当前位置:网站首页>College entrance examination score line climbing
College entrance examination score line climbing
2022-07-02 15:37:00 【jidawanghao】
import json
import numpy as np
import pandas as pd
import requests
import os
import time
import random
class School:
school_id:""
type:""
name:""
province_name:""
city_name:""
f211:""
f985:""
dual_class_name:""
# Data of colleges and universities over the years
# First cycle to get the colleges in each page id, Then from colleges and Universities id Data acquisition over the years for preparation
def get_one_page(page_num):
heads={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36'}# Request header , This is still understandable
url='https://api.eol.cn/gkcx/api/?access_token=&admissions=¢ral=&department=&dual_class=&f211=&f985=&is_doublehigh=&is_dual_class=&keyword=&nature=&page=%s&province_id=&ranktype=&request_type=1&school_type=&signsafe=&size=20&sort=view_total&top_school_id=[766,707]&type=&uri=apidata/api/gk/school/lists'
list=[]
df_all = pd.DataFrame()
for i in range(1, page_num): # You don't have to say that , use i To replace the page number , from 1 To 143
response = requests.get(url % (i), headers=heads) # Needless to say , Get a data
print(response.text)
json_data = json.loads(response.text) # get json data , Don't know Baidu , I also copy the great God
#print(json_data)
# try: This usage Baidu , Prevent the program from running down
my_json = json_data['data']['item'] # get josn The root directory of the data
for my in my_json: # Make a cycle , Pick up the school id And the name of the school
ss=School()
ss.school_id=my['school_id']
ss.name = my['name']
ss.province_name = my['province_name']
ss.city_name = my['city_name']
if my['f211']==1:
ss.f211=1
else: ss.f211=0
if my['f985']==1:
ss.f985=1
else: ss.f985=0
if my['dual_class_name']==" Double top ":
ss.dual_class_name=1
else: ss.dual_class_name=0
ss.type=my['type_name']
li = {my['school_id']: my['name']} # Every school id Form a dictionary key value pair with the school \
#2021
urlkzx = 'https://static-data.eol.cn/www/2.0/schoolprovinceindex/2021/%s/35/2/1.json' # Construction is really a website ,
res = requests.get(urlkzx % (my['school_id']), headers=heads) # Get data ,school_id From the last cycle of crawling the school name
print(my['school_id'])
json_data=json.loads(res.text)
print(json_data)
if json_data!='':
data=json_data['data']['item'][0]
df_one = pd.DataFrame({
' School id':my['school_id'],
' School name ': ss.name,
' Province ': ss.province_name,
' The city name ': ss.city_name,
' Double top ': ss.dual_class_name,
'f985': ss.f985,
'f211': ss.f211,
' School type ': ss.type,
' year ': data['year'],
' batch ': data['local_batch_name'],
' type ': data['zslx_name'],
' Lowest score ': data['min'],
' Lowest ranking ': data['min_section'],
' Batch fraction ': data['proscore'],
}, index=[0])
print(df_one)
df_all = df_all.append(df_one, ignore_index=True)
#2020
urlkzx = 'https://static-data.eol.cn/www/2.0/schoolprovinceindex/2020/%s/35/2/1.json' # Construction is really a website ,
res = requests.get(urlkzx % (my['school_id']), headers=heads) # Get data ,school_id From the last cycle of crawling the school name
print(my['school_id'])
json_data = json.loads(res.text)
if json_data != '':
data = json_data['data']['item'][0]
df_one = pd.DataFrame({
' School id': my['school_id'],
' School name ': ss.name,
' Province ': ss.province_name,
' The city name ': ss.city_name,
' Double top ': ss.dual_class_name,
'f985': ss.f985,
'f211': ss.f211,
' School type ': ss.type,
' year ': data['year'],
' batch ': data['local_batch_name'],
' type ': data['zslx_name'],
' Lowest score ': data['min'],
' Lowest ranking ': data['min_section'],
' Batch fraction ': data['proscore'],
}, index=[0])
print(df_one)
df_all = df_all.append(df_one, ignore_index=True)
#2019
urlkzx = 'https://static-data.eol.cn/www/2.0/schoolprovinceindex/2019/%s/35/2/1.json' # Construction is really a website ,
res = requests.get(urlkzx % (my['school_id']), headers=heads) # Get data ,school_id From the last cycle of crawling the school name
print(my['school_id'])
json_data = json.loads(res.text)
if json_data != '':
data = json_data['data']['item'][0]
df_one = pd.DataFrame({
' School id': my['school_id'],
' School name ': ss.name,
' Province ': ss.province_name,
' The city name ': ss.city_name,
' Double top ': ss.dual_class_name,
'f985': ss.f985,
'f211': ss.f211,
' School type ': ss.type,
' year ': data['year'],
' batch ': data['local_batch_name'],
' type ': data['zslx_name'],
' Lowest score ': data['min'],
' Lowest ranking ': data['min_section'],
' Batch fraction ': data['proscore'],
}, index=[0])
print(df_one)
df_all = df_all.append(df_one, ignore_index=True)
#2018
urlkzx = 'https://static-data.eol.cn/www/2.0/schoolprovinceindex/2018/%s/35/2/1.json' # Construction is really a website ,
res = requests.get(urlkzx % (my['school_id']), headers=heads) # Get data ,school_id From the last cycle of crawling the school name
print(my['school_id'])
json_data = json.loads(res.text)
if json_data != '':
data = json_data['data']['item'][0]
df_one = pd.DataFrame({
' School id': my['school_id'],
' School name ': ss.name,
' Province ': ss.province_name,
' The city name ': ss.city_name,
' Double top ': ss.dual_class_name,
'f985': ss.f985,
'f211': ss.f211,
' School type ': ss.type,
' year ': data['year'],
' batch ': data['local_batch_name'],
' type ': data['zslx_name'],
' Lowest score ': data['min'],
' Lowest ranking ': data['min_section'],
' Batch fraction ': data['proscore'],
}, index=[0])
print(df_one)
df_all = df_all.append(df_one, ignore_index=True)
return df_all
# College information data
def detail(page_num):
heads = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36'} # Request header , This is still understandable
url = 'https://api.eol.cn/gkcx/api/?access_token=&admissions=¢ral=&department=&dual_class=&f211=&f985=&is_doublehigh=&is_dual_class=&keyword=&nature=&page=%s&province_id=&ranktype=&request_type=1&school_type=&signsafe=&size=20&sort=view_total&top_school_id=[766,707]&type=&uri=apidata/api/gk/school/lists'
list = []
d2 = pd.DataFrame()
for i in range(1, page_num): # use i To replace the page number
response = requests.get(url % (i), headers=heads) # Get a data
print(response.text)
json_data = json.loads(response.text) # get json data
my_json = json_data['data']['item'] # get josn The root directory of the data
for my in my_json: # Make a cycle , Pick up the school id And the name of the school
ss = School()
ss.school_id = my['school_id']
ss.name = my['name']
ss.province_name = my['province_name']
ss.city_name = my['city_name']
if my['f211'] == 1:
ss.f211 = 1
else:
ss.f211 = 0
if my['f985'] == 1:
ss.f985 = 1
else:
ss.f985 = 0
if my['dual_class_name'] == " Double top ":
ss.dual_class_name = 1
else:
ss.dual_class_name = 0
ss.type = my['type_name']
li = {my['school_id']: my['name']} # Every school id Form a dictionary key value pair with the school
# 2021
urlkzx = 'https://static-data.eol.cn/www/2.0/schoolprovinceindex/2020/%s/35/2/1.json' # Construction is really a website ,
res = requests.get(urlkzx % (my['school_id']), headers=heads) # Get data ,school_id From the last cycle of crawling the school name
print(my['school_id'])
json_data = json.loads(res.text)
print(json_data)
if json_data != '':
data = json_data['data']['item'][0]
df2 = pd.DataFrame({
' School id': my['school_id'],
' School name ': ss.name,
' Province ': ss.province_name,
' The city name ': ss.city_name,
' Double top ': ss.dual_class_name,
'f985': ss.f985,
'f211': ss.f211,
' School type ': ss.type,
}, index=[0])
print(df2)
d2 = d2.append(df2, ignore_index=True)
return d2
def get_all_page(all_page_num):
print(all_page_num)
# Storage table
df_all1 = pd.DataFrame()
# Call function
df_all = get_one_page(page_num=all_page_num)
# Additional
df_all1 = df_all1.append(df_all, ignore_index=True)
time.sleep(5)
return df_all1
def getdetail(all_page_num):
print(all_page_num)
# Storage table
d1 = pd.DataFrame()
# Call function
d2 = detail(page_num=all_page_num)
# Additional
d2 = d2.append(d1, ignore_index=True)
time.sleep(5)
return d2
df_school = get_all_page(100)
dd=getdetail(100)
df_school.to_excel('data.xlsx', index=False)
dd.to_excel('dd.xlsx', index=False)
边栏推荐
- Be a good gatekeeper on the road of anti epidemic -- infrared thermal imaging temperature detection system based on rk3568
- Leetcode skimming -- sum of two integers 371 medium
- Semantic segmentation learning notes (1)
- Force deduction solution summary 2029 stone game IX
- Pytorch 保存tensor到.mat文件
- LeetCode刷题——去除重复字母#316#Medium
- Steps for Navicat to create a new database
- 16_ Redis_ Redis persistence
- Guangzhou Emergency Management Bureau issued a high temperature and high humidity chemical safety reminder in July
- [leetcode] 167 - sum of two numbers II - enter an ordered array
猜你喜欢

损失函数与正负样本分配:YOLO系列

03. Preliminary use of golang
![[leetcode] 1905 statistics sub Island](/img/82/d2f7b829f5beb7f9f1eabe8d101ecb.png)
[leetcode] 1905 statistics sub Island

微信支付宝账户体系和支付接口业务流程

面对“缺芯”挑战,飞凌如何为客户产能提供稳定强大的保障?

Download blender on Alibaba cloud image station

Let your HMI have more advantages. Fet-g2ld-c core board is a good choice

05_ queue

密码学基础知识

Leetcode skimming - remove duplicate letters 316 medium
随机推荐
自定义异常
士官类学校名录
高考录取分数线爬取
LeetCode刷题——奇偶链表#328#Medium
【LeetCode】283-移动零
LeetCode刷题——统计各位数字都不同的数字个数#357#Medium
Data analysis thinking analysis methods and business knowledge - business indicators
[leetcode] 1020 number of enclaves
[leetcode] 200 number of islands
Leetcode question brushing - parity linked list 328 medium
Be a good gatekeeper on the road of anti epidemic -- infrared thermal imaging temperature detection system based on rk3568
Guangzhou Emergency Management Bureau issued a high temperature and high humidity chemical safety reminder in July
SQL stored procedure
LeetCode刷题——递增的三元子序列#334#Medium
6.12 critical moment of Unified Process Platform
【LeetCode】1254-统计封闭岛屿的数量
10_ Redis_ geospatial_ command
15_ Redis_ Redis. Conf detailed explanation
Wechat Alipay account system and payment interface business process
Common English abbreviations for data analysis (I)