"""
作者:acha
时间:2021-2-16
功能:模拟登录古诗文网
"""
import requests
from lxml import etree
from 爬虫.chaojiying_Python.chaojiying import Chaojiying_Client
# 请求头
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"
}
# 创建好session对象
sess = requests.Session()
# 处理动态变化的请求参数
# 1.解析出本次登录页面对应的验证码图片地址
login_url = 'https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx'
page_text = sess.get(url=login_url, headers=headers).text
tree = etree.HTML(page_text)
# 解析出了验证码图片的地址
img_path = 'https://so.gushiwen.org' + tree.xpath('//*[@id="imgCode"]/@src')[0]
img_data = sess.get(url=img_path, headers=headers).content # 请求到了图片数据
# 将图片保存到本地存储
with open('./code.jpg', 'wb') as fp:
fp.write(img_data)
# 将动态变化的请求参数从页面源码中解析出来
__VIEWSTATE = tree.xpath('//*[@id="__VIEWSTATE"]/@value')[0]
__VIEWSTATEGENERATOR = tree.xpath('//*[@id="__VIEWSTATEGENERATOR"]/@value')[0]
# 识别验证码
def imgcode(file_path):
chaojiying = Chaojiying_Client('用户名', '密码', '软件ID')
im = open(file_path, 'rb').read()
code = (chaojiying.PostPic(im, 1004)['pic_str'])
print(code)
return code
# 获取验证码
code_result = imgcode('code.jpg')
# 古诗文网 url
post_url = 'https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx'
# 动态参数
data = {
"__VIEWSTATE": "lG3WvIKiDx5sEmj8IeYT6LmF1reN9ep/Q2b+U7W2RCMdA2JF5F9NRvaEfIepywyrCTFVIlRHGtorc6dkttOZ0GCzQsQPFdpLeB2kDD6J+vXb/BvqhxWtwSJ+02I=",
"__VIEWSTATEGENERATOR": "C93BE1AE",
"from: http": "//so.gushiwen.cn/user/collect.aspx",
"email": "wz.0527@qq.com",
"pwd": "qwerqwer",
"code": code_result,
"denglu": "登录",
}
# 模拟登录的请求
response = sess.post(url=post_url, headers=headers, data=data)
# 登录成功后页面的源码数据
page_text = response.text
# 保存网页
with open('gushiwen.html', 'w', encoding='utf-8') as fp:
fp.write(page_text)
评论