模拟登录古诗文网

"""
作者：acha
时间：2021-2-16
功能：模拟登录古诗文网
"""

import requests
from lxml import etree
from 爬虫.chaojiying_Python.chaojiying import Chaojiying_Client
# 请求头
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"
}
# 创建好session对象
sess = requests.Session()
# 处理动态变化的请求参数
# 1.解析出本次登录页面对应的验证码图片地址
login_url = 'https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx'
page_text = sess.get(url=login_url, headers=headers).text
tree = etree.HTML(page_text)
# 解析出了验证码图片的地址
img_path = 'https://so.gushiwen.org' + tree.xpath('//*[@id="imgCode"]/@src')[0]
img_data = sess.get(url=img_path, headers=headers).content  # 请求到了图片数据
# 将图片保存到本地存储
with open('./code.jpg', 'wb') as fp:
    fp.write(img_data)
# 将动态变化的请求参数从页面源码中解析出来
__VIEWSTATE = tree.xpath('//*[@id="__VIEWSTATE"]/@value')[0]
__VIEWSTATEGENERATOR = tree.xpath('//*[@id="__VIEWSTATEGENERATOR"]/@value')[0]


# 识别验证码
def imgcode(file_path):
    chaojiying = Chaojiying_Client('用户名', '密码', '软件ID')
    im = open(file_path, 'rb').read()
    code = (chaojiying.PostPic(im, 1004)['pic_str'])
    print(code)
    return code


# 获取验证码
code_result = imgcode('code.jpg')
# 古诗文网 url
post_url = 'https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx'
# 动态参数
data = {
    "__VIEWSTATE": "lG3WvIKiDx5sEmj8IeYT6LmF1reN9ep/Q2b+U7W2RCMdA2JF5F9NRvaEfIepywyrCTFVIlRHGtorc6dkttOZ0GCzQsQPFdpLeB2kDD6J+vXb/BvqhxWtwSJ+02I=",
    "__VIEWSTATEGENERATOR": "C93BE1AE",
    "from: http": "//so.gushiwen.cn/user/collect.aspx",
    "email": "wz.0527@qq.com",
    "pwd": "qwerqwer",
    "code": code_result,
    "denglu": "登录",
}
# 模拟登录的请求
response = sess.post(url=post_url, headers=headers, data=data)
# 登录成功后页面的源码数据
page_text = response.text
# 保存网页
with open('gushiwen.html', 'w', encoding='utf-8') as fp:
    fp.write(page_text)