-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathScrap.py
141 lines (116 loc) · 3.72 KB
/
Scrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 22 13:59:23 2018
@author: YURU
"""
import json
import requests
import pandas as pd
import js2py
deptId = ("GE", #通識中心
"CE", #工學院
"CB", #商學院
"CS", #理學院
"CH", #人社學院
"CI", #資電學院
"CD", #建設學院
"CF", #金融學院
"NM", #國際科技與管理學院
"AS", #建築專業學院
"PC", #學分學程
"XA", #外語文
"XC", #通識核心課
"XD", #體育選項課
"XE", #綜合班
"XF", #統籌科目
"XH", #軍訓
)
data=[]
url = "https://coursesearch04.fcu.edu.tw/Service/Search.asmx/GetType1Result"
year=106
for sms in [1,2]:
for dept in deptId:
params = {'baseOptions': {'lang': "cht", 'year': year, 'sms': sms},
'typeOptions': {'degree': "1", 'deptId': dept, 'unitId': "*", 'classId': "*"}}
r = requests.post(url, json=params)
for d in json.loads(json.loads(r.text)['d'])['items']:
d['year']=year
d['sms']=sms
d['deptId']=dept
d['courseid'] = str(d['year'])+str(d['sms'])+d['cls_id']+d['sub_id']+d['scr_dup']
print(d)
data.append(d)
data
title = list(data[0].keys())
title
d = []
for i in range(len(title)):
d.append([])
d
for dd in data:
for i,ds in enumerate(dd.values()):
d[i].append(ds)
d
df = pd.DataFrame()
for i in range(len(title)):
df[title[i]] = d[i]
df
'''
http://service120.sds.fcu.edu.tw/W320104/W320104_stu_pre.aspx?courseid=1061CE0711127436001&lang=cht
http://service120.sds.fcu.edu.tw/W320104/action/getdata.aspx/getEvoluationInfo
courseid = year + sms + cls_id + sub_id + scr_dup
'''
#get cookies
r=requests.get('http://service120.sds.fcu.edu.tw/W320104/W320104_stu_pre.aspx?courseid=1061CE0711127436001&lang=cht')
r.cookies
cookie = {'Cookie':'ASP.NET_SessionId='+r.cookies['ASP.NET_SessionId']}
cookie
df.to_csv('106course.csv',encoding='utf-8',index=False)
#js轉py
js2py.translate_file('DES.js', 'DES.py')
from DES import *
course_id=[]
for cid in df['courseid']:
en = str(encMe(cid))[1:-1]
print(en)
course_id.append(en)
course_id
df['course_id'] = course_id
df
r = requests.get('http://service120.sds.fcu.edu.tw/W320104/W320104_stu_pre.aspx?courseid=1061CE0711127436001&lang=cht')
cookie = {'Cookie':'ASP.NET_SessionId='+r.cookies['ASP.NET_SessionId']}
params = {"course_id":"e3e73945c1c6299996bbeff1916a72e8532af778ef114215ac81a6729f51590368e931f1f26b98c3"}
print(cookie)
print(params)
url = 'http://service120.sds.fcu.edu.tw/W320104/action/getdata.aspx/getEvoluationInfo'
r = requests.post(url, json=params, headers=cookie)
json.loads(r.text)['d'][0]
df = pd.read_csv('106course.csv')
df
tpa_score=[]
url = 'http://service120.sds.fcu.edu.tw/W320104/action/getdata.aspx/getEvoluationInfo'
for i,cid in enumerate(df['course_id']):
if i%20 == 0:
r = requests.get('http://service120.sds.fcu.edu.tw/W320104/W320104_stu_pre.aspx?courseid=1061CE0711127436001&lang=cht')
cookie = {'Cookie':'ASP.NET_SessionId='+r.cookies['ASP.NET_SessionId']}
print(cookie)
params = {"course_id":cid}
print(params)
for t in range(3):
print(t)
try:
r = requests.post(url, json=params, headers=cookie,timeout=5)
except:
print('timeout')
if r.status_code == 200:
break
s = json.loads(r.text)['d']
if s == '':
tpa_score.append(-1)
else:
s = s[0]['tpa_score_new']
tpa_score.append(s)
print(s)
df['tpa_score'] = tpa_score
course106 = df[df['tpa_score']!='-1']
course106.to_csv('106course.csv',encoding='utf-8',index=False)