当前位置:网站首页>Selenium crawls stocks in practice

Selenium crawls stocks in practice

2022-06-24 01:06:00 ruochen

selenium Library usage

The inspiration of the project

<img src="https://img-blog.csdnimg.cn/20201122112749272.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3RhbnN0eV96aA==,size_16,color_FFFFFF,t_70#pic_center" alt=" Insert picture description here ">

Today, I'm going to the yard market to find some practical projects to practice my hand , So I found this project ( The project is over )

He has two more needs , The latter two requirements are similar , I won't write code anymore

Necessary knowledge

1. For some elementary knowledge , My previous blogs have written , You can go and have a look
2.selenium Unbounded operation of
self.opt = Options()
self.opt.add_argument('--no-sandbox')  #  solve DevToolsActivePort File does not exist 
self.opt.add_argument('window-size=1920x3000')  #  Set browser resolution 
self.opt.add_argument('--disable-gpu')  #  Google Documents mention the need to add this attribute to circumvent bug
self.opt.add_argument('--hide-scrollbars')  #  Hide scroll bar , Deal with special pages 
self.opt.add_argument('blink-settings=imagesEnabled=false')  #  Don't load pictures , Increase the running speed 
self.opt.add_argument('--headless')  #  The browser doesn't provide a visual interface .Linux Next, if the system does not support visualization, it will fail to start 
# opt.binary_location = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe" #  Manually specify the browser location to use 
self.driver=Chrome(options=self.opt)
# Create an interface free object 

Because I mainly encapsulate into classes , So there will be self, If you don't need it, just delete self Just fine

3.xlwt Save data
#  Create a workbook  Set encoding 
workbook = xlwt.Workbook(encoding='utf-8')
#  Create a worksheet
worksheet = workbook.add_sheet(' The latest stock price ')
#  write in excel
#  Parameters of the corresponding   That's ok ,  Column ,  value 
worksheet.write(0, 0, " Stock code ")
worksheet.write(0, 1, " Individual stock name ")
worksheet.write(0, 2, " The latest price ")
for i in range(len(number)):
    worksheet.write(i + 1, 0, number[i])
    worksheet.write(i + 1, 1, name[i])
    worksheet.write(i + 1, 2, money[i])
workbook.save(' The latest stock price .xls')

Complete code presentation

import xlwt
import parsel
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
import time
class get_gupaio():
    def __init__(self):
        url = "http://quote.eastmoney.com/center/gridlist.html"
        self.opt = Options()
        self.opt.add_argument('--no-sandbox')  #  solve DevToolsActivePort File does not exist 
        self.opt.add_argument('window-size=1920x3000')  #  Set browser resolution 
        self.opt.add_argument('--disable-gpu')  #  Google Documents mention the need to add this attribute to circumvent bug
        self.opt.add_argument('--hide-scrollbars')  #  Hide scroll bar , Deal with special pages 
        self.opt.add_argument('blink-settings=imagesEnabled=false')  #  Don't load pictures , Increase the running speed 
        self.opt.add_argument('--headless')  #  The browser doesn't provide a visual interface .Linux Next, if the system does not support visualization, it will fail to start 
        # opt.binary_location = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe" #  Manually specify the browser location to use 
        self.driver=Chrome(options=self.opt)
        # Create an interface free object 
        self.driver.implicitly_wait(10)
        self.driver.get(url)
    def get_one_page(self,content):
        # print(content)
        sel=parsel.Selector(content)
        number=sel.xpath("//tbody/tr/td[2]/a/text() ").getall()
        # print(number)
        # print(len(number))
        name=sel.xpath("//tbody/tr/td[@class='mywidth']/a/text()").getall()
        # print(name)
        money=sel.xpath("//tbody/tr/td[5]/span/text()").getall()
        # print(money)
        # print(len(money))
        return number,name,money

    def save_data(self,number,name,money):
        #  Create a workbook  Set encoding 
        workbook = xlwt.Workbook(encoding='utf-8')
        #  Create a worksheet
        worksheet = workbook.add_sheet(' The latest stock price ')
        #  write in excel
        #  Parameters of the corresponding   That's ok ,  Column ,  value 
        worksheet.write(0, 0, " Stock code ")
        worksheet.write(0, 1, " Individual stock name ")
        worksheet.write(0, 2, " The latest price ")
        for i in range(len(number)):
            worksheet.write(i + 1, 0, number[i])
            worksheet.write(i + 1, 1, name[i])
            worksheet.write(i + 1, 2, money[i])
        workbook.save(' The latest stock price .xls')

    def run(self):
        number = []
        name = []
        money = []
        for i in range(10):
            		self.driver.find_element_by_xpath("/html/body/div[1]/div[2]/div[2]/div[5]/div/div[2]/div/input").clear()
            self.driver.find_element_by_xpath("/html/body/div[1]/div[2]/div[2]/div[5]/div/div[2]/div/input").send_keys(i + 1)
             self.driver.find_element_by_xpath("/html/body/div[1]/div[2]/div[2]/div[5]/div/div[2]/div/a[3]").click()
            time.sleep(1)
            content = self.driver.page_source
            (a, b, c) = self.get_one_page(content)
            number.extend(a)
            name.extend(b)
            money.extend(c)
        print(number)
        print(name)
        print(money)
        self.save_data(number, name, money)
        self.driver.quit()
get_gupaio().run()
原网站

版权声明
本文为[ruochen]所创,转载请带上原文链接,感谢
https://yzsam.com/2021/11/20211120180829905l.html