当前位置：网站首页>Lesson 3 urllib

Lesson 3 urllib

2022-06-25 20:43:00 【Osmanthus rice wine balls】

The third class urllib

One 、 Encapsulate the source code in the web page into an object

import urllib.request

# Get one get request 
response = urllib.request.urlopen("http://www.baidu.com") # Packaged in response in 
print(response.read().decode('utf-8')) #decode('utf-8') Decode the obtained web page code , To prevent the occurrence of Chinese characters , Print out the web source code 


# Get one post request ( Used to simulate login （ password , user ）)
 use httpbin.org

import urllib.parse # Parser , Parsing key value pairs 

data = bytes(urllib.parse.urlencode({
    "hello":"world"}),encoding = "utf-8")# Forms , Package that encapsulates key value pair information into binary ,encoding = "utf-8" Encapsulation 

response = urllib.request.urlopen("http://httpbin.org/post",data = data)

print(response.read().decode('utf-8'))

Two 、 Timeout problem

try:
	response = urllib.request.urlopen("http://httpbin.org/post",timeout=0.01)# For more than 0.01 second 
	print(response.read().decode('utf-8'))
except urllib.error.URLError as e:
    print("time out!")

3、 ... and 、 Response header questions （ Pretend to be a browser ）

url = "https://httpbin.org/post"

headers = {
    "User-Agent":"……"}

data = bytes(urllib.parse.urlencode({
    "hello":"world"}),encoding = "utf-8")

req = urllib.request.Request(url=url,data=data,headers=headers,method='post')# encapsulation , A browser that simulates reality 

response = urllib.request.urlopen(req)# encapsulation 

print(response.read().decode("utf-8"))

look for User-Agent Methods ( look for headers The key/value pair ):

Find in the network

[ Failed to transfer the external chain picture , The origin station may have anti-theft chain mechanism , It is suggested to save the pictures and upload them directly (img-yaNApJ6z-1644636635823)(C:\Users\ litchi \AppData\Roaming\Typora\typora-user-images\image-20220204161745986.png)]

Four 、 get data

# Crawl to the web 
def getData(baseurl):
	dataist = []
    for i in range(0,10):# Call the function to get page information ,10 Time 
    	url = baseurl + str(i*25)
        html = askURL(url)# Save the source code of the web page 
	return datalist

# Get the designated one URL The web content of 
def askURL(url):
	head = {
    
	"User-Agent":"……"
	}# To disguise , Simulate browser header information 
    request = urllib.request.Request(url,headers=head)# carry headers To visit url
    try:
        	response = urllib.request.urlopen(request)# Get information about the entire web page 
            html = response.read().decode("utf-8")# Read information （ Web source code ）
    except urllib.error.URLError as e:# Capture the error 
        if hasattr(e,"code"):
            print(e.code)# Print code, See what's wrong with the coding 
        if hasattr(e,"reason"):
            print(e.reason)# Print out the reasons for the failure 
    return html