当前位置：网站首页>Reptile Foundation_ Requests Library

Reptile Foundation_ Requests Library

2022-06-21 14:07:00 【Naughty oranges】

install

pip install requests

Basic use

Handle GET request

import requests

url = 'https://www.httpbin.org/get'
params = {
      # GET Request parameters 
    'name': 'germey',
    'age': 25
}
headers = {
      #  Request header information 
     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36',
}
# GET Request parameters placed in params in ,  The request header information is placed in headers in , cookies Information is also placed in headers in 
resp = requests.get(url, params=params, headers=headers)
#  If the response page appears garbled ,  You can set the character encoding here ,  Commonly used character codes are 'utf-8', 'gbk'
resp.encoding = 'utf-8'

print(type(resp.status_code), resp.status_code)  #  Get the corresponding status , int type 
print(type(resp.url), resp.url)  #  obtain URL, str type 
print(type(resp.cookies), resp.cookies)  #  obtain Cookies Information , requests.cookies.RequestsCookieJar,  It's like a dictionary 
print(type(resp.headers), resp.headers)  #  Get response header information , requests.structures.CaseInsensitiveDict
print(type(resp.text), resp.text)  #  Get response text , str type 
print(type(resp.content), resp.content)  #  Get the binary data of the response , bytes type 
print(type(resp.json()), resp.json())  #  Get the response's json data ,  Resolve to dictionary type 
print(type(resp.history), resp.history)  #  Get request history information , list type

Handle POST request

import requests

url = 'https://www.httpbin.org/post'
data = {
      # POST Request parameters 
    'name': 'germey',
    'age': 25
}
headers = {
      #  Request header information 
     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36',
}
# POST Request parameters placed in data in ,  The request header information is placed in headers in , Cookies Information is also placed in headers in 
resp = requests.post(url, data=data, headers=headers)
#  If the response page appears garbled ,  You can set the character encoding here ,  Commonly used character codes are 'utf-8', 'gbk'
resp.encoding = 'utf-8'

print(type(resp.status_code), resp.status_code)  #  Get the corresponding status , int type 
print(type(resp.url), resp.url)  #  obtain URL, str type 
print(type(resp.cookies), resp.cookies)  #  obtain Cookies Information , requests.cookies.RequestsCookieJar,  It's like a dictionary 
print(type(resp.headers), resp.headers)  #  Get response header information , requests.structures.CaseInsensitiveDict
print(type(resp.text), resp.text)  #  Get response text , str type 
print(type(resp.content), resp.content)  #  Get the binary data of the response , bytes type 
print(type(resp.json()), resp.json())  #  Get the response's json data ,  Resolve to dictionary type 
print(type(resp.history), resp.history)  #  Get request history information , list type

Pass on post Request parameters

#  When you see in the developer tool post The request parameter is Form data
#  And in the head of the request content-type = application/x-www-form-urlencoded  when 
#  You should use data Pass request parameters 
data = {
    
	'user': 'admin',
	'password': '123'
}
resp = requests.post(url=url, data=data)

#  When you see in the developer tool post The request parameter is Payload length
#  And in the head of the request content-type = application/json  when 
#  You should use json Pass request parameters 
data = {
    
	'user': 'admin',
	'password': '123'
}
resp = requests.post(url=url, json=data)

Determine the response status code

import requests

url = 'https://www.baidu.com'

resp = requests.get(url)
resp.encoding = 'utf-8'
# requests.codes All response status codes and corresponding names are built in 
if resp.status_code == requests.codes.ok:
    print(resp.text)

import requests
import pprint
#  By printing __dict__ attribute ,  A complete list of all corresponding statuses and names is available 
pprint.pprint(requests.codes.__dict__)

Advanced usage

Upload files

import requests

filename = 'a.jpg'
url = 'https://www.httpbin.org/post'
with open(filename, 'rb') as f:  #  Note that the file is opened in binary mode 
    files = {
    
        'file': f,
    }
    resp = requests.post(url, files=files)
    print(resp.text)

Set up Cookie

#  Method 1 :
import requests

url = 'https://www.baidu.com'
#  Can be Cookie The information is in headers Inside 
headers = {
    
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.81 Safari/537.36',
    'Cookie': 'BAIDUID=C9F55ED34B7A432880C20EC9B623FE45:FG=1; BIDUPSID=C9F55ED34B7A432880C20EC9B623FE45; PSTM=1562147626; __yjs_duid=1_f2e5c4c58408baab83c1ffaae4e236191620178752543; BDUSS=lU3T3huNE9zanFRS2xhLWkzSW1UZng3cTZHQ1ZIUEVkdEx1YTlPQ1hIRzJyd2RoRVFBQUFBJCQAAAAAAAAAAAEAAADF5e0EZGVtb25zY2cAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALYi4GC2IuBgb; BDUSS_BFESS=lU3T3huNE9zanFRS2xhLWkzSW1UZng3cTZHQ1ZIUEVkdEx1YTlPQ1hIRzJyd2RoRVFBQUFBJCQAAAAAAAAAAAEAAADF5e0EZGVtb25zY2cAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALYi4GC2IuBgb; BAIDUID_BFESS=6577419B064404B084AAE4A9EAC8B0F8:FG=1; BAIDU_WISE_UID=wapp_1630208716966_354; baikeVisitId=399f54c9-61e4-4041-8841-8d3142839673; channel=baidusearch; COOKIE_SESSION=23354665_1_6_3_21_14_1_1_3_3_0_4_3714278_0_0_0_1615865424_1588998461_1639220087%7C9%230_1_1588997783%7C1; sug=3; sugstore=0; ORIGIN=2; bdime=0',
}
resp = requests.get(url, headers=headers)
print(resp.text)

#  Method 2 :
import requests
from requests.cookies import RequestsCookieJar

url = 'https://www.baidu.com'
#  take Cookie Information is encapsulated into requests.cookies.RequestsCookieJar object 
cookie_str = 'BAIDUID=C9F55ED34B7A432880C20EC9B623FE45:FG=1; BIDUPSID=C9F55ED34B7A432880C20EC9B623FE45; PSTM=1562147626; __yjs_duid=1_f2e5c4c58408baab83c1ffaae4e236191620178752543; BDUSS=lU3T3huNE9zanFRS2xhLWkzSW1UZng3cTZHQ1ZIUEVkdEx1YTlPQ1hIRzJyd2RoRVFBQUFBJCQAAAAAAAAAAAEAAADF5e0EZGVtb25zY2cAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALYi4GC2IuBgb; BDUSS_BFESS=lU3T3huNE9zanFRS2xhLWkzSW1UZng3cTZHQ1ZIUEVkdEx1YTlPQ1hIRzJyd2RoRVFBQUFBJCQAAAAAAAAAAAEAAADF5e0EZGVtb25zY2cAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALYi4GC2IuBgb; BAIDUID_BFESS=6577419B064404B084AAE4A9EAC8B0F8:FG=1; BAIDU_WISE_UID=wapp_1630208716966_354; baikeVisitId=399f54c9-61e4-4041-8841-8d3142839673; channel=baidusearch; COOKIE_SESSION=23354665_1_6_3_21_14_1_1_3_3_0_4_3714278_0_0_0_1615865424_1588998461_1639220087%7C9%230_1_1588997783%7C1; sug=3; sugstore=0; ORIGIN=2; bdime=0'
jar = RequestsCookieJar()  #  Create a RequestsCookieJar object 
for item in cookie_str.split(';'):
    key, value = item.split('=', 1)  #  yes , we have cookie There may be more than one message =,  So just cut it once 
    jar.set(key, value)  #  Add item by item Cookie Information 

headers = {
    
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.81 Safari/537.36',
}

resp = requests.get(url, headers=headers, cookies=jar)
print(resp.text)

Session maintain

requests Library built-in Session Object can simulate a session , Automatic processing Cookie Information , Including saving the data sent by the server Cookie Information , When sending a request, the saved Cookie Information, etc
It is worth noting that , session Will not help you maintain headers Information , So every time you use session When the request is sent , Remember Manual carry on headers

import requests

url1 = 'https://www.httpbin.org/cookies/set/number/123456789'
url2 = 'https://www.httpbin.org/cookies'
#  establish Session object 
session = requests.Session()
#  It can be for session Set up headers, proxies etc. ,  Use the same... In subsequent requests session There is no need to set it again 
session.headers = headers
session.proxies = proxies
#  visit url1,  obtain cookies Information 
session.get(url1)
#  obtain Cookie after ,  Can return dictionary form cookie,  Or according to cookie Of key To get the value 
session.cookies.get_dict()
session.cookies.get('key_name')
#  According to need ,  You can also go to session Add Cookie Information 
session.cookies['key_name'] = 'value'
#  visit url2,  The echo cookies Information 
resp = session.get(url2)
print(resp.text)

SSL Certificate validation

import requests
import logging
from requests.packages import urllib3

url = 'https://ssr2.scrape.center/'
#  Method 1 for shielding warning information :
# urllib3.disable_warnings()
#  Mode 2 of shielding warning information :
# logging.captureWarnings(True)

#  When accessing a SSL The certificate is invalid or expired ,  Will report a mistake requests.exceptions.SSLError
#  You can specify verify=False Bypass certificate validation ,  But a warning message will appear 
#  Warning messages can be masked using the above two methods 
resp = requests.get(url, verify=True)
print(resp.text)

timeout

import requests

url = 'https://www.httpbin.org/get'

resp1 = requests.get(url, timeout=1)  # 1 Seconds no response received ,  Throw an exception 
resp2 = requests.get(url, timeout=(5, 30))  #  Set the connection and read time respectively 
resp3 = requests.get(url, timeout=None)  #  Wait forever ,  Do not set the default to None

Identity Authentication

import requests
from requests.auth import HTTPBasicAuth

url = 'https://ssr3.scrape.center'

#  Mode one :
auth = HTTPBasicAuth('admin', 'admin')  #  Create authentication object ,  And pass in the user name and password 
resp1 = requests.get(url, auth=auth)  #  The ginseng 

#  Mode two ( Simple ):
resp2 = requests.get(url, auth=('admin', 'admin'))  #  Pass in tuples ,  Will be automatically encapsulated into HTTPBasicAuth object 
print(resp.text)

Using agents

import requests

url = 'https://www.httpbin.org/get'

proxies = {
      #  Prepare the agent 
    'http': 'http://124.64.8.50:8000',
    'https': 'http://124.64.8.50:8000'
}
resp = requests.get(url, proxies=proxies)  #  The ginseng 
print(resp.text)

Build... Manually Request object

from requests import Request, Session

url = 'https://www.httpbin.org/post'
data = {
    'name': 'germey'}
headers = {
    
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.81 Safari/537.36',
}
session = Session()  #  Use session
"""Request Constructor for  Request(method=None, url=None, headers=None, files=None, data=None, params=None, auth=None, cookies=None, hooks=None, json=None) """
#  establish Request object 
req = Request(method='POST', url=url, headers=headers, data=data)
#  Get ready Request object 
prepped = session.prepare_request(req)
#  send out Request object 
resp = session.send(prepped)
print(resp.text)