当前位置:网站首页>50 lines of code to crawl TOP500 books and import TXT documents

50 lines of code to crawl TOP500 books and import TXT documents

2022-06-26 18:17:00 Little fox dreams of going to fairy tale town

50 Line code crawl Top500 Book Import TXT file

import re   # Regular expressions , Text extraction 
import requests
import json

def main(page):
    # Claim to crawl the URL 
    baseurl = "http://bang.dangdang.com/books/fivestars/01.00.00.00.00.00-recent30-0-0-1-" + str(page)
    # Crawling the web content 
    datalist = getData(baseurl)
    # Save web data 
    savepath = "Top500_book.txt"
    saveData(datalist,savepath)

# Get data 
def getData(baseurl):
    html = askURL(baseurl)
    datalist = parse_result(html)
    return datalist
# Parse the source code 
def parse_result(html):
   pattern = re.compile('<li>.*?list_num.*?(\d+).</div>.*?<img src="(.*?)".*?class="name".*?title="(.*?)">.*?class="star">.*?class="tuijian">(.*?)</span>.*?class="publisher_info">.*?target="_blank">(.*?)</a>.*?class="biaosheng">.*?<span>(.*?)</span></div>.*?<p><span\sclass="price_n">&yen;(.*?)</span>.*?</li>',re.S)
   items = re.findall(pattern,html)
   for item in items:
       yield {
    
           'range': item[0],
           'iamge': item[1],
           'title': item[2],
           'recommend': item[3],
           'author': item[4],
           'times': item[5],
           'price': item[6]
       }
# Get web source 
def askURL(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
    except requests.RequestException:
        return None
# Save data to txt Text document 
def saveData(datalst,savepath):
    print("save....")
    for item in datalst:
        with open(savepath, 'a', encoding='UTF-8') as f:
            f.write(json.dumps(item, ensure_ascii=False) + '\n')
            f.close()

if __name__ == '__main__':
    #for Loop to turn the page 
    for i in range(1,26):
        main(i)

【 Running results 】
 Insert picture description here

原网站

版权声明
本文为[Little fox dreams of going to fairy tale town]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/177/202206261807467730.html