Requests库 简单使用

后端 / 2020-03-10

requests库

请求

GET

原型


def get(url, params=None, **kwargs):

简单使用


import requests 

# 一个简单的GET请求
def main():
	response = requests.get('http://wwww.baidu.com/')
	print(type(response))
	print(type(response.status_code))
	print(type(response.text))
	print(response.cookies)

if __name__ == '__main__':
	main()

输出

<class 'requests.models.Response'>
<class 'int'>
<class 'str'>
<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>

参数列表


  • url 请求地址
  • params 参数
  • headers 协议头

带参数GET


import requests

def main():
   data = {
   'name': 'Alice',
   'age': 20
   }
   response = requests.get("http://httpbin.org/get", params=data)
   print(response.text)

if __name__ == '__main__':
	main()

输出

{
"args": {
"age": "20", 
"name": "Alice"
}, 
"headers": {
"Accept": "*/*", 
"Accept-Encoding": "gzip, deflate", 
"Host": "httpbin.org", 
"User-Agent": "python-requests/2.21.0"
}, 
"origin": "60.221.192.206, 60.221.192.206", 
"url": "https://httpbin.org/get?name=Alice&age=20"
}

返回JSON格式


import requests

def main():
   response = requests.get("http://httpbin.org/get")
   print(response.json())

if __name__ == '__main__':
	main()

输出

{
"args": {},
"headers": {
   "Accept": "*/*",
   "Accept-Encoding": "gzip, deflate",
   "Host": "httpbin.org",
   "User-Agent": "python-requests/2.21.0"
},
"origin": "60.221.192.206, 60.221.192.206",
"url": "https://httpbin.org/get"
}

抓取一个页面


import requests

def main():
   headers = {
   'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36
   '
   response = requests.get("https://www.zhihu.com/explore", headers=headers)
   print(response.text)

if __name__ == '__main__':
	main()

获取数据流


import requests

def main():
   response = requests.get("https://github.com/favicon.ico")
   with open('favicon.ico', 'wb') as f:
       f.write(response.conte=nt)

if __name__ == '__main__':
	main()

Cookies

百度百科

import requests
def main():
   cookies= '_xsrf=GnQV3DAEm4T0wUHlReVtKsTKm8VDQTRF; _zap=ca623bfe-abc7-4fc2-95e4-825f28ad6d2c; d_c0="AMCqHY_YhQ-PTuC1elqVNP-gSqxJR4KHotA=|1559460286"; ISSW=1; capsion_ticket="2|1:0|10:1562331090|14:capsion_ticket|44:MWM1OTRkYmJjODZjNDlhNmJjN2RhZGYwYTBjOTc0Mjg=|a9e01bcde1a2f7087b5beb60095087e7b42780919d0461f5dfeb5ec1fafd481d"; z_c0="2|1:0|10:1562331112|4:z_c0|92:Mi4xbkk0UUJRQUFBQUFBd0tvZGo5aUZEeVlBQUFCZ0FsVk42SlVNWGdCa1RNMkQ1VFg1ZG1QNWViaE4ybFBLNzJFUENn|0071cb81df80bf1ea5996eee2ff3a432f4eb13dcaae596f9cfdbc6b3f3b11190"; tst=r; q_c1=282aa6826ede4be68565f796cf2fdf70|1562331225000|1562331225000; tgw_l7_route=116a747939468d99065d12a386ab1c5f'
   jar=requests.cookies.RequestsCookieJar()
   headers={
      'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36',
   }
   for cookies in cookies.split(';'):
      key,value = cookies.split('=',1)
      jar.set(key,value)
   response = requests.get('https://www.zhihu.com/people/luzhenfang/activities',cookies = jar,headers=headers)
   print(response.text)

if __name__ == '__main__':
	main()

POST

原型


def post(url, data=None, json=None, **kwargs):

简单使用

import requests

def main():
   data = {'name':'Alice','age':'20'}
   response = requests.post('http://httpbin.org/post',data=data)
   print(response.text)

if __name__ == '__main__':
	main()

输出

{
"args": {}, 
"data": "", 
"files": {}, 
"form": {
"age": "20", 
"name": "Alice"
}, 
"headers": {
"Accept": "*/*", 
"Accept-Encoding": "gzip, deflate", 
"Content-Length": "17", 
"Content-Type": "application/x-www-form-urlencoded", 
"Host": "httpbin.org", 
"User-Agent": "python-requests/2.21.0"
}, 
"json": null, 
"origin": "60.221.192.206, 60.221.192.206", 
"url": "https://httpbin.org/post"
}

上传文件

import requests

def main():
   files = {'file': open('favicon.ico', 'rb')}
   response = requests.post('http://httpbin.org/post',files=files)
   print(response.text)

if __name__ == '__main__':
	main()

输出

{
"args": {}, 
"data": "", 
"files": {
"file": "data:application/octet-stream;文件的base64 编码数据"
}, 
"form": {}, 
"headers": {
"Accept": "*/*", 
"Accept-Encoding": "gzip, deflate", 
"Content-Length": "6665", 
"Content-Type": "multipart/form-data; boundary=c6d8330f2fa3f5827552271ecf5c44ed", 
"Host": "httpbin.org", 
"User-Agent": "python-requests/2.21.0"
}, 
"json": null, 
"origin": "60.221.192.206, 60.221.192.206", 
"url": "https://httpbin.org/post"
}



Other

requests.XXX('http://xxxx/')

XXX 可以是

  • get 获取页面
  • post 数据提交
  • put 传送数据代指文档
  • delete 请求删除
  • head 获取报头
  • options 查看服务器性能
  • connect 代替客户访问
import requests

def main():
     requests.post('http://httpbin.org/post')
     requests.put('http://httpbin.org/put')
     requests.delete('http://httpbin.org/delete')
     requests.head('http://httpbin.org/get')
     requests.options('http://httpbin.org/get')

if __name__ == '__main__':
	main()

响应

当我们发送完请求后,得到的结果自然是响应. 我们可以通过 tex 和content 获取响应内容,此外还有很多属性和方法可用用来获取其他信息,比如状态码 响应头 cookies 等

import requests


def main():
   response = requests.get('http://jianshu.com/')
   print(type(response.status_code))
   print(response.status_code)

   print(type(response.headers))
   print(response.headers)

   print(type(response.cookies))
   print(response.cookies)

if __name__ == '__main__':
	main()

输出

<class 'int'>
403
<class 'requests.structures.CaseInsensitiveDict'>
{'Server': 'Tengine', 'Content-Type': 'text/html', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Date': 'Sun, 21 Jul 2019 05:24:28 GMT', 'Vary': 'Accept-Encoding', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains; preload', 'Content-Encoding': 'gzip', 'x-alicdn-da-ups-status': 'endOs,0,403', 'Via': 'cache8.l2nu16-1[22,0], cache1.cn483[37,0]', 'Timing-Allow-Origin': '*', 'EagleId': '3cdfd94115636866688226745e'}
<class 'requests.cookies.RequestsCookieJar'>
<RequestsCookieJar[]>

会话维持

先正常访问

import requests

def main():
   # 请求并设置cookies
   requests.get('http://httpbin.org/cookies/set/number/123456789')
   # 取出cookies
   response = requests.get('http://httpbin.org/cookies')
   print(response.text)

if __name__ == '__main__':
	main()

输出

{
"cookies": {}
}

我们并没有取到cookies

也就是这两次请求相当于打开了两个不同的浏览器

Seeion对象


import requests


def main():
   session = requests.Session()
   session.get('http://httpbin.org/cookies/set/number/123456789')
   response = session.get('http://httpbin.org/cookies')
   print(response.text)


if __name__ == '__main__':
	main()

输出

{
"cookies": {
"number": "123456789"
}
}



这次我们成功获取到了cookies

因为 session 为我们保留了会话

SSL证书

import requests


def main():
response = requests.get('https://www.12306.cn')
print(response.status_code)


if __name__ == '__main__':
main()

运行上述代码后会出现SSLError 错误 ,那么如何避免这个错误呢? 很简单 把verify参数设置为 False 即可

import requests


def main():
response = requests.get('https://www.12306.cn',verify=False)
print(response.status_code)

if __name__ == '__main__':
main()

捕获警告

import requests
import logging

def main():
logging.captureWarnings(True)
response = requests.get('https://www.12306.cn',verify=False)
print(response.status_code)


if __name__ == '__main__':
main()

代理设置

proxies 参数

import requests

def main():
proxies = {
   'http':'http://10.10.1.10:3128',
   'https':'http://10.10.1.10:1080'
}
requests.get('https://wwww.taobao.com',proxies=proxies)

if __name__ == '__main__':
main()

当然 requests 还支持socks 协议代理

pip3 install 'requests[socks]'

import requests

def main():
proxies = {
   'http': 'socks5://user:password@host:port',
   'https': 'socks5://user:password@host:port'
}
requests.get("https://www.taobao.com", proxies=proxies)


if __name__ == '__main__':
main()

超时设置

timeout 参数

import requests

def main():
response = requests.get("https://www.taobao.com", timeout = 1)
print(response.status_code)


if __name__ == '__main__':
main()

通过这样的方式,我们可以将超时时间设置为1秒,如果1秒内没有响应,那就抛出异常。

实际上,请求分为两个阶段,即连接(connect)和读取(read)。

上面设置的timeout将用作连接和读取这二者的timeout总和。

如果要分别指定,就可以传入一个元组:

response = requests.get('https://www.taobao.com', timeout=(5,11, 30))

如果想永久等待,可以直接将timeout设置为None,或者不设置直接留空,因为默认是None

response = requests.get('https://www.taobao.com', timeout=None)
# 或者不加参数
response = requests.get('https://www.taobao.com')

身份认证

import requests
from requests.auth import HTTPBasicAuth

def main():
response = requests.get('http://localhost:5000', auth=HTTPBasicAuth('username', 'password'))
print(r.status_code)


if __name__ == '__main__':
main()

如果 用户和密码都正确的话 请求时就会自动认证成功,返回200状态码,否则返回失败 401状态码。

当然我们也可以直接传入一个元组,他会默认使用HTTPBasicAuth这个类来认证。

response = requests.get('http://localhost:5000', auth=('username', 'password'))
print(r.status_code)

因此上面代码可以简写为

import requests
from requests.auth import HTTPBasicAuth

def main():
response = requests.get('http://localhost:5000', auth=('username', 'password'))
print(r.status_code)


if __name__ == '__main__':
main()

此外,requests还提供了其他认证方式,如OAuth认证,不过此时需要安装oauth包,安装命令如下

pip3 install requests_oauthlib

使用OAuth1认证的方法如下

import requests
from requests.auth import HTTPBasicAuth

def main():
url = 'https://api.twitter.com/1.1/account/verify_credentials.json'
auth = OAuth1('YOUR_APP_KEY', 'YOUR_APP_SECRET',
             'USER_OAUTH_TOKEN', 'USER_OAUTH_TOKEN_SECRET')
requests.get(url, auth=auth)


if __name__ == '__main__':
main()

Prepared Request

from requests import Request, Session

def main():
url = 'http://httpbin.org/post'
data = {
   'name': 'germey'
}
headers = {
   'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
}
session = Session()
req = Request('POST', url, data=data, headers=headers)
prepped = session.prepare_request(req)
response = session.send(prepped)
print(response.text)

if __name__ == '__main__':
main()

输出

{
"args": {}, 
"data": "", 
"files": {}, 
"form": {
"name": "germey"
}, 
"headers": {
"Accept": "*/*", 
"Accept-Encoding": "gzip, deflate", 
"Content-Length": "11", 
"Content-Type": "application/x-www-form-urlencoded", 
"Host": "httpbin.org", 
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36"
}, 
"json": null, 
"origin": "218.26.55.243, 218.26.55.243", 
"url": "https://httpbin.org/post"
}

这里我们引入了Request,然后用urldataheaders参数构造了一个Request对象,这时需要再调用Sessionprepare_request()方法将其转换为一个Prepared Request对象,然后调用send()方法,同样实现了POST方法.