requests库
请求
GET
原型
def get(url, params=None, **kwargs):
简单使用
import requests # 一个简单的GET请求 def main(): response = requests.get('http://wwww.baidu.com/') print(type(response)) print(type(response.status_code)) print(type(response.text)) print(response.cookies) if __name__ == '__main__': main()
输出
<class 'requests.models.Response'> <class 'int'> <class 'str'> <RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
参数列表
- url 请求地址
- params 参数
- headers 协议头
带参数GET
import requests def main(): data = { 'name': 'Alice', 'age': 20 } response = requests.get("http://httpbin.org/get", params=data) print(response.text) if __name__ == '__main__': main()
输出
{ "args": { "age": "20", "name": "Alice" }, "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Host": "httpbin.org", "User-Agent": "python-requests/2.21.0" }, "origin": "60.221.192.206, 60.221.192.206", "url": "https://httpbin.org/get?name=Alice&age=20" }
返回JSON格式
import requests def main(): response = requests.get("http://httpbin.org/get") print(response.json()) if __name__ == '__main__': main()
输出
{ "args": {}, "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Host": "httpbin.org", "User-Agent": "python-requests/2.21.0" }, "origin": "60.221.192.206, 60.221.192.206", "url": "https://httpbin.org/get" }
抓取一个页面
import requests def main(): headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 ' response = requests.get("https://www.zhihu.com/explore", headers=headers) print(response.text) if __name__ == '__main__': main()
获取数据流
import requests def main(): response = requests.get("https://github.com/favicon.ico") with open('favicon.ico', 'wb') as f: f.write(response.conte=nt) if __name__ == '__main__': main()
Cookies
import requests def main(): cookies= '_xsrf=GnQV3DAEm4T0wUHlReVtKsTKm8VDQTRF; _zap=ca623bfe-abc7-4fc2-95e4-825f28ad6d2c; d_c0="AMCqHY_YhQ-PTuC1elqVNP-gSqxJR4KHotA=|1559460286"; ISSW=1; capsion_ticket="2|1:0|10:1562331090|14:capsion_ticket|44:MWM1OTRkYmJjODZjNDlhNmJjN2RhZGYwYTBjOTc0Mjg=|a9e01bcde1a2f7087b5beb60095087e7b42780919d0461f5dfeb5ec1fafd481d"; z_c0="2|1:0|10:1562331112|4:z_c0|92:Mi4xbkk0UUJRQUFBQUFBd0tvZGo5aUZEeVlBQUFCZ0FsVk42SlVNWGdCa1RNMkQ1VFg1ZG1QNWViaE4ybFBLNzJFUENn|0071cb81df80bf1ea5996eee2ff3a432f4eb13dcaae596f9cfdbc6b3f3b11190"; tst=r; q_c1=282aa6826ede4be68565f796cf2fdf70|1562331225000|1562331225000; tgw_l7_route=116a747939468d99065d12a386ab1c5f' jar=requests.cookies.RequestsCookieJar() headers={ 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36', } for cookies in cookies.split(';'): key,value = cookies.split('=',1) jar.set(key,value) response = requests.get('https://www.zhihu.com/people/luzhenfang/activities',cookies = jar,headers=headers) print(response.text) if __name__ == '__main__': main()
POST
原型
def post(url, data=None, json=None, **kwargs):
简单使用
import requests def main(): data = {'name':'Alice','age':'20'} response = requests.post('http://httpbin.org/post',data=data) print(response.text) if __name__ == '__main__': main()
输出
{ "args": {}, "data": "", "files": {}, "form": { "age": "20", "name": "Alice" }, "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Content-Length": "17", "Content-Type": "application/x-www-form-urlencoded", "Host": "httpbin.org", "User-Agent": "python-requests/2.21.0" }, "json": null, "origin": "60.221.192.206, 60.221.192.206", "url": "https://httpbin.org/post" }
上传文件
import requests def main(): files = {'file': open('favicon.ico', 'rb')} response = requests.post('http://httpbin.org/post',files=files) print(response.text) if __name__ == '__main__': main()
输出
{ "args": {}, "data": "", "files": { "file": "data:application/octet-stream;文件的base64 编码数据" }, "form": {}, "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Content-Length": "6665", "Content-Type": "multipart/form-data; boundary=c6d8330f2fa3f5827552271ecf5c44ed", "Host": "httpbin.org", "User-Agent": "python-requests/2.21.0" }, "json": null, "origin": "60.221.192.206, 60.221.192.206", "url": "https://httpbin.org/post" }
Other
requests.XXX('http://xxxx/')
XXX 可以是
- get 获取页面
- post 数据提交
- put 传送数据代指文档
- delete 请求删除
- head 获取报头
- options 查看服务器性能
- connect 代替客户访问
import requests def main(): requests.post('http://httpbin.org/post') requests.put('http://httpbin.org/put') requests.delete('http://httpbin.org/delete') requests.head('http://httpbin.org/get') requests.options('http://httpbin.org/get') if __name__ == '__main__': main()
响应
当我们发送完请求后,得到的结果自然是响应. 我们可以通过 tex 和content 获取响应内容,此外还有很多属性和方法可用用来获取其他信息,比如状态码 响应头 cookies 等
import requests def main(): response = requests.get('http://jianshu.com/') print(type(response.status_code)) print(response.status_code) print(type(response.headers)) print(response.headers) print(type(response.cookies)) print(response.cookies) if __name__ == '__main__': main()
输出
<class 'int'> 403 <class 'requests.structures.CaseInsensitiveDict'> {'Server': 'Tengine', 'Content-Type': 'text/html', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Date': 'Sun, 21 Jul 2019 05:24:28 GMT', 'Vary': 'Accept-Encoding', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains; preload', 'Content-Encoding': 'gzip', 'x-alicdn-da-ups-status': 'endOs,0,403', 'Via': 'cache8.l2nu16-1[22,0], cache1.cn483[37,0]', 'Timing-Allow-Origin': '*', 'EagleId': '3cdfd94115636866688226745e'} <class 'requests.cookies.RequestsCookieJar'> <RequestsCookieJar[]>
会话维持
先正常访问
import requests def main(): # 请求并设置cookies requests.get('http://httpbin.org/cookies/set/number/123456789') # 取出cookies response = requests.get('http://httpbin.org/cookies') print(response.text) if __name__ == '__main__': main()
输出
{ "cookies": {} }
我们并没有取到cookies
也就是这两次请求相当于打开了两个不同的浏览器
Seeion对象
import requests def main(): session = requests.Session() session.get('http://httpbin.org/cookies/set/number/123456789') response = session.get('http://httpbin.org/cookies') print(response.text) if __name__ == '__main__': main()
输出
{ "cookies": { "number": "123456789" } }
这次我们成功获取到了cookies
因为 session 为我们保留了会话
SSL证书
import requests def main(): response = requests.get('https://www.12306.cn') print(response.status_code) if __name__ == '__main__': main()
运行上述代码后会出现SSLError 错误 ,那么如何避免这个错误呢? 很简单 把verify参数设置为 False 即可
import requests def main(): response = requests.get('https://www.12306.cn',verify=False) print(response.status_code) if __name__ == '__main__': main()
捕获警告
import requests import logging def main(): logging.captureWarnings(True) response = requests.get('https://www.12306.cn',verify=False) print(response.status_code) if __name__ == '__main__': main()
代理设置
proxies 参数
import requests def main(): proxies = { 'http':'http://10.10.1.10:3128', 'https':'http://10.10.1.10:1080' } requests.get('https://wwww.taobao.com',proxies=proxies) if __name__ == '__main__': main()
当然 requests 还支持socks 协议代理
pip3 install 'requests[socks]'
import requests def main(): proxies = { 'http': 'socks5://user:password@host:port', 'https': 'socks5://user:password@host:port' } requests.get("https://www.taobao.com", proxies=proxies) if __name__ == '__main__': main()
超时设置
timeout 参数
import requests def main(): response = requests.get("https://www.taobao.com", timeout = 1) print(response.status_code) if __name__ == '__main__': main()
通过这样的方式,我们可以将超时时间设置为1秒,如果1秒内没有响应,那就抛出异常。
实际上,请求分为两个阶段,即连接(connect)和读取(read)。
上面设置的
timeout
将用作连接和读取这二者的timeout
总和。如果要分别指定,就可以传入一个元组:
response = requests.get('https://www.taobao.com', timeout=(5,11, 30))
如果想永久等待,可以直接将
timeout
设置为None
,或者不设置直接留空,因为默认是None
。response = requests.get('https://www.taobao.com', timeout=None) # 或者不加参数 response = requests.get('https://www.taobao.com')
身份认证
import requests from requests.auth import HTTPBasicAuth def main(): response = requests.get('http://localhost:5000', auth=HTTPBasicAuth('username', 'password')) print(r.status_code) if __name__ == '__main__': main()
如果 用户和密码都正确的话 请求时就会自动认证成功,返回200状态码,否则返回失败 401状态码。
当然我们也可以直接传入一个元组,他会默认使用
HTTPBasicAuth
这个类来认证。response = requests.get('http://localhost:5000', auth=('username', 'password')) print(r.status_code)
因此上面代码可以简写为
import requests from requests.auth import HTTPBasicAuth def main(): response = requests.get('http://localhost:5000', auth=('username', 'password')) print(r.status_code) if __name__ == '__main__': main()
此外,requests还提供了其他认证方式,如OAuth认证,不过此时需要安装oauth包,安装命令如下
pip3 install requests_oauthlib
使用OAuth1认证的方法如下
import requests from requests.auth import HTTPBasicAuth def main(): url = 'https://api.twitter.com/1.1/account/verify_credentials.json' auth = OAuth1('YOUR_APP_KEY', 'YOUR_APP_SECRET', 'USER_OAUTH_TOKEN', 'USER_OAUTH_TOKEN_SECRET') requests.get(url, auth=auth) if __name__ == '__main__': main()
Prepared Request
from requests import Request, Session def main(): url = 'http://httpbin.org/post' data = { 'name': 'germey' } headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36' } session = Session() req = Request('POST', url, data=data, headers=headers) prepped = session.prepare_request(req) response = session.send(prepped) print(response.text) if __name__ == '__main__': main()
输出
{ "args": {}, "data": "", "files": {}, "form": { "name": "germey" }, "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Content-Length": "11", "Content-Type": "application/x-www-form-urlencoded", "Host": "httpbin.org", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36" }, "json": null, "origin": "218.26.55.243, 218.26.55.243", "url": "https://httpbin.org/post" }
这里我们引入了
Request
,然后用url
、data
和headers
参数构造了一个Request
对象,这时需要再调用Session
的prepare_request()
方法将其转换为一个Prepared Request对象,然后调用send()
方法,同样实现了POST方法.