爬虫笔记速查
正则万能匹配
re.findall("txt(.*?)txt", r.text, re.DOTALL)
ddddocr
ocr = ddddocr.DdddOcr()
code = ocr.classification(r.content)
Http/2.0
pip3 install 'httpx[http2]'
import httpx
client = httpx.Client(http2=True)
r = client.get(url, headers=headers, cookies=cookies)
print(r.text)
requests.session
# 先实例化一个对象
session = requests.session()
# 后面用法和直接使用requests一样了
response = session.get(url) # get请求
response = session.post(url, json=json_data) # post请求
result = response.json()
Https证书验证
requests.get(url, verify=False) # 不验证证书
import warnings
warnings.filterwarnings("ignore") # 关闭警告
http basic auth 认证
from requests.auth import HTTPBasicAuth
requests.get(url, auth=HTTPBasicAuth("user", "password"))
302不重定向
requests.get(url, allow_redirects=False)
License:
CC BY 4.0