当前位置:网站首页>College entrance examination score line climbing
College entrance examination score line climbing
2022-07-02 15:37:00 【jidawanghao】
import json
import numpy as np
import pandas as pd
import requests
import os
import time
import random
class School:
school_id:""
type:""
name:""
province_name:""
city_name:""
f211:""
f985:""
dual_class_name:""
# Data of colleges and universities over the years
# First cycle to get the colleges in each page id, Then from colleges and Universities id Data acquisition over the years for preparation
def get_one_page(page_num):
heads={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36'}# Request header , This is still understandable
url='https://api.eol.cn/gkcx/api/?access_token=&admissions=¢ral=&department=&dual_class=&f211=&f985=&is_doublehigh=&is_dual_class=&keyword=&nature=&page=%s&province_id=&ranktype=&request_type=1&school_type=&signsafe=&size=20&sort=view_total&top_school_id=[766,707]&type=&uri=apidata/api/gk/school/lists'
list=[]
df_all = pd.DataFrame()
for i in range(1, page_num): # You don't have to say that , use i To replace the page number , from 1 To 143
response = requests.get(url % (i), headers=heads) # Needless to say , Get a data
print(response.text)
json_data = json.loads(response.text) # get json data , Don't know Baidu , I also copy the great God
#print(json_data)
# try: This usage Baidu , Prevent the program from running down
my_json = json_data['data']['item'] # get josn The root directory of the data
for my in my_json: # Make a cycle , Pick up the school id And the name of the school
ss=School()
ss.school_id=my['school_id']
ss.name = my['name']
ss.province_name = my['province_name']
ss.city_name = my['city_name']
if my['f211']==1:
ss.f211=1
else: ss.f211=0
if my['f985']==1:
ss.f985=1
else: ss.f985=0
if my['dual_class_name']==" Double top ":
ss.dual_class_name=1
else: ss.dual_class_name=0
ss.type=my['type_name']
li = {my['school_id']: my['name']} # Every school id Form a dictionary key value pair with the school \
#2021
urlkzx = 'https://static-data.eol.cn/www/2.0/schoolprovinceindex/2021/%s/35/2/1.json' # Construction is really a website ,
res = requests.get(urlkzx % (my['school_id']), headers=heads) # Get data ,school_id From the last cycle of crawling the school name
print(my['school_id'])
json_data=json.loads(res.text)
print(json_data)
if json_data!='':
data=json_data['data']['item'][0]
df_one = pd.DataFrame({
' School id':my['school_id'],
' School name ': ss.name,
' Province ': ss.province_name,
' The city name ': ss.city_name,
' Double top ': ss.dual_class_name,
'f985': ss.f985,
'f211': ss.f211,
' School type ': ss.type,
' year ': data['year'],
' batch ': data['local_batch_name'],
' type ': data['zslx_name'],
' Lowest score ': data['min'],
' Lowest ranking ': data['min_section'],
' Batch fraction ': data['proscore'],
}, index=[0])
print(df_one)
df_all = df_all.append(df_one, ignore_index=True)
#2020
urlkzx = 'https://static-data.eol.cn/www/2.0/schoolprovinceindex/2020/%s/35/2/1.json' # Construction is really a website ,
res = requests.get(urlkzx % (my['school_id']), headers=heads) # Get data ,school_id From the last cycle of crawling the school name
print(my['school_id'])
json_data = json.loads(res.text)
if json_data != '':
data = json_data['data']['item'][0]
df_one = pd.DataFrame({
' School id': my['school_id'],
' School name ': ss.name,
' Province ': ss.province_name,
' The city name ': ss.city_name,
' Double top ': ss.dual_class_name,
'f985': ss.f985,
'f211': ss.f211,
' School type ': ss.type,
' year ': data['year'],
' batch ': data['local_batch_name'],
' type ': data['zslx_name'],
' Lowest score ': data['min'],
' Lowest ranking ': data['min_section'],
' Batch fraction ': data['proscore'],
}, index=[0])
print(df_one)
df_all = df_all.append(df_one, ignore_index=True)
#2019
urlkzx = 'https://static-data.eol.cn/www/2.0/schoolprovinceindex/2019/%s/35/2/1.json' # Construction is really a website ,
res = requests.get(urlkzx % (my['school_id']), headers=heads) # Get data ,school_id From the last cycle of crawling the school name
print(my['school_id'])
json_data = json.loads(res.text)
if json_data != '':
data = json_data['data']['item'][0]
df_one = pd.DataFrame({
' School id': my['school_id'],
' School name ': ss.name,
' Province ': ss.province_name,
' The city name ': ss.city_name,
' Double top ': ss.dual_class_name,
'f985': ss.f985,
'f211': ss.f211,
' School type ': ss.type,
' year ': data['year'],
' batch ': data['local_batch_name'],
' type ': data['zslx_name'],
' Lowest score ': data['min'],
' Lowest ranking ': data['min_section'],
' Batch fraction ': data['proscore'],
}, index=[0])
print(df_one)
df_all = df_all.append(df_one, ignore_index=True)
#2018
urlkzx = 'https://static-data.eol.cn/www/2.0/schoolprovinceindex/2018/%s/35/2/1.json' # Construction is really a website ,
res = requests.get(urlkzx % (my['school_id']), headers=heads) # Get data ,school_id From the last cycle of crawling the school name
print(my['school_id'])
json_data = json.loads(res.text)
if json_data != '':
data = json_data['data']['item'][0]
df_one = pd.DataFrame({
' School id': my['school_id'],
' School name ': ss.name,
' Province ': ss.province_name,
' The city name ': ss.city_name,
' Double top ': ss.dual_class_name,
'f985': ss.f985,
'f211': ss.f211,
' School type ': ss.type,
' year ': data['year'],
' batch ': data['local_batch_name'],
' type ': data['zslx_name'],
' Lowest score ': data['min'],
' Lowest ranking ': data['min_section'],
' Batch fraction ': data['proscore'],
}, index=[0])
print(df_one)
df_all = df_all.append(df_one, ignore_index=True)
return df_all
# College information data
def detail(page_num):
heads = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36'} # Request header , This is still understandable
url = 'https://api.eol.cn/gkcx/api/?access_token=&admissions=¢ral=&department=&dual_class=&f211=&f985=&is_doublehigh=&is_dual_class=&keyword=&nature=&page=%s&province_id=&ranktype=&request_type=1&school_type=&signsafe=&size=20&sort=view_total&top_school_id=[766,707]&type=&uri=apidata/api/gk/school/lists'
list = []
d2 = pd.DataFrame()
for i in range(1, page_num): # use i To replace the page number
response = requests.get(url % (i), headers=heads) # Get a data
print(response.text)
json_data = json.loads(response.text) # get json data
my_json = json_data['data']['item'] # get josn The root directory of the data
for my in my_json: # Make a cycle , Pick up the school id And the name of the school
ss = School()
ss.school_id = my['school_id']
ss.name = my['name']
ss.province_name = my['province_name']
ss.city_name = my['city_name']
if my['f211'] == 1:
ss.f211 = 1
else:
ss.f211 = 0
if my['f985'] == 1:
ss.f985 = 1
else:
ss.f985 = 0
if my['dual_class_name'] == " Double top ":
ss.dual_class_name = 1
else:
ss.dual_class_name = 0
ss.type = my['type_name']
li = {my['school_id']: my['name']} # Every school id Form a dictionary key value pair with the school
# 2021
urlkzx = 'https://static-data.eol.cn/www/2.0/schoolprovinceindex/2020/%s/35/2/1.json' # Construction is really a website ,
res = requests.get(urlkzx % (my['school_id']), headers=heads) # Get data ,school_id From the last cycle of crawling the school name
print(my['school_id'])
json_data = json.loads(res.text)
print(json_data)
if json_data != '':
data = json_data['data']['item'][0]
df2 = pd.DataFrame({
' School id': my['school_id'],
' School name ': ss.name,
' Province ': ss.province_name,
' The city name ': ss.city_name,
' Double top ': ss.dual_class_name,
'f985': ss.f985,
'f211': ss.f211,
' School type ': ss.type,
}, index=[0])
print(df2)
d2 = d2.append(df2, ignore_index=True)
return d2
def get_all_page(all_page_num):
print(all_page_num)
# Storage table
df_all1 = pd.DataFrame()
# Call function
df_all = get_one_page(page_num=all_page_num)
# Additional
df_all1 = df_all1.append(df_all, ignore_index=True)
time.sleep(5)
return df_all1
def getdetail(all_page_num):
print(all_page_num)
# Storage table
d1 = pd.DataFrame()
# Call function
d2 = detail(page_num=all_page_num)
# Additional
d2 = d2.append(d1, ignore_index=True)
time.sleep(5)
return d2
df_school = get_all_page(100)
dd=getdetail(100)
df_school.to_excel('data.xlsx', index=False)
dd.to_excel('dd.xlsx', index=False)
边栏推荐
- Oracle primary key auto increment
- List set & UML diagram
- Leetcode question brushing - parity linked list 328 medium
- Map introduction
- LeetCode_ Sliding window_ Medium_ 395. Longest substring with at least k repeated characters
- Basic knowledge of cryptography
- 【LeetCode】1162-地图分析
- Build your own semantic segmentation platform deeplabv3+
- XML Configuration File
- Evaluation of embedded rz/g2l processor core board and development board of Feiling
猜你喜欢

How to avoid 7 common problems in mobile and network availability testing

Steps for Navicat to create a new database

03.golang初步使用

Case introduction and problem analysis of microservice
![[network security] network asset collection](/img/3e/6665b5af0dedfcbc7bd548cc486878.png)
[network security] network asset collection

LeetCode刷题——奇偶链表#328#Medium

搭建自己的语义分割平台deeplabV3+

工程师评测 | RK3568开发板上手测试

14_ Redis_ Optimistic lock

基于RZ/G2L | OK-G2LD-C开发板存储读写速度与网络实测
随机推荐
03. Preliminary use of golang
密码学基础知识
. Solution to the problem of Chinese garbled code when net core reads files
Pytorch 保存tensor到.mat文件
2022 年辽宁省大学生数学建模A、B、C题(相关论文及模型程序代码网盘下载)
Bing. Com website
高考分数线爬取
Yolov5 code reproduction and server operation
6095. 强密码检验器 II
做好抗“疫”之路的把关人——基于RK3568的红外热成像体温检测系统
Evaluation of embedded rz/g2l processor core board and development board of Feiling
Redux - detailed explanation
【LeetCode】876-链表的中间结点
How does the computer set up speakers to play microphone sound
【LeetCode】977-有序数组的平方
【LeetCode】977-有序數組的平方
LeetCode刷题——奇偶链表#328#Medium
SQL stored procedure
How to choose a third-party software testing organization for automated acceptance testing of mobile applications
4. Data splitting of Flink real-time project