yield scrapy.Request(...,dont_filter=True)
dont_filter=True这里很重要,表示这个url不被去重。
示例代码
import scrapy
import json
from alipan.items import MzPicItem
class MzpicSpider(scrapy.Spider):
name = 'mzpic'
start_urls = ['xxxxxxxx']
def parse(self, response):
try:
obj = json.loads(response.text)
is_ok = obj["code"]
if is_ok != "200":
raise Exception("错误的请求:" + str(is_ok))
item = MzPicItem()
item["url"] = obj["data"]["imgurl"]
yield item
# 重复抓取
yield scrapy.Request(self.start_urls[0], callback=self.parse,dont_filter=True)
except Exception as e:
print(e)