当前位置:网站首页>selenium入门级项目 - 豆豆玩竞猜
selenium入门级项目 - 豆豆玩竞猜
2022-06-22 03:00:00 【BGONE】
在初步学习selenium的定位方法之后,我们就可以找些网站来测试了,这次我选择的网站是豆豆玩
测试目的
- 点掉首页弹窗

模拟登陆,含简单的验证码识别

表格提交

获取15期结果,存进Mysql数据库
思路与主要代码
去JS弹窗
这个容易,获取CSS标签,点掉即可:
s = Service("C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")
driver = webdriver.Chrome(options=chrome_options, service=s)
ddwURL = "http://www.doudouwan.net/"
driver.get(url=ddwURL)
time.sleep(6)
driver.find_elements(by=By.CLASS_NAME,value='layui-layer-btn0')[0].click()
模拟登陆
这里登陆填账号密码不再赘述,重点讨论验证码识别。
网站的验证码文件是html格式,并且是随机的,网页打开就会刷新一次:2578
我的思路是:网页截取验证码所在区域的图片,保存到本地,然后进行文字OCR识别,
我们不研究识别的原理,现在有很多开源的库可以供我们使用,选择了ddddocr,除了有广告,基本可以使用:
def validate(url):
ocr = ddddocr.DdddOcr()
with open(url, 'rb') as f:
image = f.read()
res = ocr.classification(image)
return res
def snipScreent(url):
driver.get(url)
time.sleep(2)
width = driver.execute_script("return document.documentElement.scrollWidth")
height = driver.execute_script("return document.documentElement.scrollHeight")
driver.set_window_size(width, height) # 修改浏览器窗口大小
# 搜索结果部分完整截图
r_node = driver.find_element(by='xpath', value='/html/body/div[3]/div/div[2]/dl/dd[3]/img')
print('网页模块尺寸:height={},width={}'.format(r_node.size['height'], r_node.size['width']))
times = int(time.time())
pngPath = r'D:\image\%s.png' % times
r_node.screenshot(pngPath)
# im = Image.open(pngPath)
# print("截图尺寸:height={},width={}".format(im.size[1], im.size[0]))
return pngPath
picPath = snipScreent(url=ddwURL)
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.i.i_username > input").send_keys("bgone")
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.i.i_password > input").send_keys("123456")
# downURL = "http://doudouwan.net/register/register_ver_code.html"
# picPath = download(url=downURL)
num = validate(url=picPath)
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.i.i_code > input").send_keys(num)
time.sleep(3)
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.a > a.submit").click()
randomIdle()
表格提交
投注本身也不难,但我们需要写一个算法以尽量维持程序运行:
def throw():
asserT = False
while not asserT:
try:
t = driver.find_element(by=By.CSS_SELECTOR,
value=r"body > div.fun_main > div.fun_left > div.left_table > table > tbody > tr:nth-child(6) > td:nth-child(1)").get_attribute(
"textContent")
css = "#revoke_%s > a" % t
driver.find_element(by=By.CSS_SELECTOR, value=css).click()
except Exception as ep:
print(ep)
randomIdle()
driver.refresh()
else:
asserT = True
def bet(input=0):
a = getCoins()
driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value1").send_keys(input)
driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value3").send_keys(input)
driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value5").send_keys(input)
driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value7").send_keys(input)
driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value9").send_keys(input)
driver.find_element(by=By.XPATH, value="/html/body/div[3]/div[1]/div[5]/div/div[3]/div[2]/div/div[3]/a").click()
return int(a)-5*input
def getTimer():
decrypt = driver.find_element(by=By.CSS_SELECTOR,value="#bettingLottTime").get_attribute("textContent")
partake = driver.find_element(by=By.CSS_SELECTOR,value="#bettingOverTime").get_attribute("textContent")
if "已停止参与" in partake:
if "解谜中,请稍后" in decrypt:
return True
return False
p = False
while not p:
initialC = 20
mp = False
for i in range(1,maxloop):
# time.sleep(50)
driver.get(url=guessURL90)
throw()
# 投入
a = bet(input=initialC)
# 等待解谜
timerAssert = getTimer()
while not timerAssert:
time.sleep(2)
timerAssert = getTimer()
time.sleep(5)
#
# 获取结果
driver.refresh()
c = collection()
print(c)
if c < 0:
initialC = initialC * 2
if initialC*5 > (a+c):
break
time.sleep(5)
elif c > 0:
mp = True
break
数据库操作
先本地搭建mysql服务器,Navicat Premium 15 建个表ddw,字段为:
主键在Number上;然后脚本获取数据并存入:
class Sql():
def __int__(self):
self.host = "192.168.222.1"
self.username = "root"
self.password = ""
def connectMysql(self):
# 然后连接数据库
connection = pymysql.connect(host="localhost",
user="root",
password="",
db='ddw',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
return connection
def instertMysql(self,num,date,result,coin,hits,inn,out):
connection = self.connectMysql()
# 对数据库进行操作
try:
with connection.cursor() as cursor:
# 创建新记录
sql = "INSERT INTO `ddw`.`ddw` (`Number`, `DateTime`, `Result`, `Coins`, `Hits`, `In`, `Out`) VALUES (%s, %s, %s, %s, %s, %s, %s)"
cursor.execute(sql, (num, date, result, coin, hits, inn, out))
# 默认不会自动提交,所以需要我们自己提交来保存改变后的内容*
connection.commit()
with connection.cursor() as cursor:
# 读取单个记录
sql = "SELECT Number FROM ddw.ddw WHERE Number=%s"
cursor.execute(sql, (num))
result = cursor.fetchone()
print(result)
finally:
connection.close()
def selectMysql(self,index=0):
# def instertMysql(self, mum, date, result, coin, hits, inn, out):
# 对数据库进行操作
try:
with connection.cursor() as cursor:
# 读取单个记录
if not index:
sql = "select * from ddw ORDER BY 'Number' DESC LIMIT 1"
else:
sql = "select * from ddw ORDER BY 'Number' DESC LIMIT %d"
cursor.execute(sql, (index))
result = cursor.fetchone()
print(result)
return result
finally:
connection.close()
def collection():
year = datetime.datetime.now().year
sql = Sql()
connection = sql.connectMysql()
content = driver.find_elements(by=By.XPATH,value="/html/body/div[3]/div[1]/div[5]/table/tbody/tr/td")
alist = []
for i in content:
html = etree.fromstring(i.get_attribute("innerHTML"), parser=etree.HTMLParser())
try:
alist.append(html.xpath("//text()"))
except Exception:
alist.append([i.get_attribute("innerHTML")])
usefulContent = alist[36:-1]
# 0 1 2 3 4
# `Number`, `DateTime`, `Result`, `Coins`, `Hits`, `In`, `Out`
for index in range(0,len(usefulContent),7):
# import pdb
# pdb.set_trace()
# n = index % 7
# if n == 0:
Number= int(usefulContent[index][0])
# elif n == 1:
DateTime = str(year) + "-" + usefulContent[index+1][0]
# elif n == 2:
Result = int(usefulContent[index+2][0])
# elif n == 3:
Coins = "".join(usefulContent[index+3][0].split(","))
# elif n == 4:
Hits = int("".join(usefulContent[index+4][0].split(",")))
# elif n == 5:
In = int("".join(usefulContent[index+5][0].split(":")[-1].split(",")))
Out = int("".join(usefulContent[index+5][1].split(":")[-1].split(",")))
with connection.cursor() as cursor:
try:
existOne = "SELECT Number FROM ddw.ddw WHERE Number=%s"
cursor.execute(existOne, (Number))
result = cursor.fetchone()
print(result)
if not result:
sql.instertMysql(Number, DateTime, Result, Coins, Hits, In, Out)
except Exception as ep:
print(ep)
finally:
cursor.close()
connection.close()
return In-Out
结果展示:
总结
难度系数低,可以获取数据用于日后数据分析
边栏推荐
- Horizontal comparison of domestic API management platforms, which one is stronger?
- Using open source software to save an enterprise level map data platform solution
- Dynamically load assetdatabase of assets
- Sword finger offer 58 Symmetric binary tree
- Sword finger offer 12 Path in matrix
- C mapster object mapper learning
- Markdown is proficient in Elementary Grammar and is compatible with marktext
- Parallel search DSU
- 【 thesis 】 zero reference depth curve estimation for low light image enhancement
- Select for i/0 multiplexing
猜你喜欢

六、MySQL之数据定义语言(一)

HarmonyOS鸿蒙使用ORM Bee访问数据库实例
![[1. quick sort]](/img/3d/66ce309761d0d79a5d09718a67def8.png)
[1. quick sort]

Redis6.0新特性(下)

UnionPay payment return merchant nignx post request 405

uv_loop_init()流程

Typora + picGo 配置图床实现图片自动上传

Live broadcast on June 22 | zhanzhihui, South China Institute of Technology: evolutionary computing for expensive optimization

tag动态规划-刷题预备知识-1.动态规划五部曲解题法 + lt.509. 斐波那契数/ 剑指Offer 10.I + lt.70. 爬楼梯彻底解惑 + 面试真题扩展

Day14QProgressBar2021-10-17
随机推荐
[pit encountered in docekr learning]
uv_loop_init()流程
Unity3d C # generates non repeated random numbers in the interval
Try catch of Bash
【7. 高精度除法】
2022年买理财产品买三个月还是半年?
6、 MySQL data definition language (1)
[3. binary integer and floating point number]
Redis6.0新特性(下)
关于Map做状态映射的问题
【2. 归并排序】
xpm_memory_tdpram原语的完整使用实例
Basic structure and application of backlight module
tag动态规划-刷题预备知识-1.动态规划五部曲解题法 + lt.509. 斐波那契数/ 剑指Offer 10.I + lt.70. 爬楼梯彻底解惑 + 面试真题扩展
Right and left vertical time axis with serial number
An article thoroughly learns to draw data flow diagrams
Sword finger offer 56 Delete duplicate nodes of the linked list
Day18qt signal and slot 2021-10-29
Check information on the Internet after the college entrance examination, and pay attention to prevent websites without SSL certificates
Library management system (PHP final report)