# coding=Big5
# 引入 requests 模組
import requests
# 使用 GET 方式下載普通網頁
#url='https://www.google.com.tw/'
url = "https://physexp.thu.edu.tw"
url = "https://physexp.thu.edu.tw/~AP/YC/COD/SCRBK"
pt = 'https://physexp.thu.edu.tw/~AP/YC/COD/SCRBK/'
r = requests.get(url,'utf-8')
print('r.url=',r.url)
# 伺服器回應的狀態碼
print(r.status_code)
# 檢查狀態碼是否 OK
if r.status_code == requests.codes.ok:
print("OK")
# 輸出網頁 HTML 原始碼
#print(r.text)
s=r.text
b=r.content
L2=b.split(b'\n')
FW1=open('COD1.html','w',encoding='UTF-8')
FW1.write(s)
FW1.close()
FW2=open('COD2.html','wb')
FW2.write(b)
FW2.close()
print('FW2.write is done')
FW3=open('COD3.html','wb')
nn=0; Lpdf=[]; LNAM=[]
for j in L2:
if(b"href" in j and b'HTML' in j and b'pdf' in j):
nn+=1
#print(nn,j[0:280])
FW3.write(j+b'<br>\n')
Lj = j.split(b'"')
print('Lj=',Lj)
sj=Lj[1].decode("utf-8")
tsj=sj.split('/')
psj="'"+pt+sj+"'"
psj=pt+sj
print(nn,psj,tsj[1])
Lpdf.append(psj)
LNAM.append(tsj[1])
FW3.close()
print('FW3 is done')
print('LNAM=',LNAM)
nn=0
for j in Lpdf:
url = "https://physexp.thu.edu.tw/~AP/YC/COD/HTML/GP2-L21-pq.pdf"
url=Lpdf[nn]
nam=LNAM[nn]
print(nn,url,nam)
r = requests.get(url,'utf-8')
print(r.status_code)
if r.status_code == requests.codes.ok: print("OK")
s2=r.text
b2=r.content
FW5=open(nam,'wb')
FW5.write(b2)
FW5.close()
nn+=1