让scrapy重复抓一个页面

后端 / 2021-07-28
yield scrapy.Request(...,dont_filter=True)

dont_filter=True这里很重要,表示这个url不被去重。

示例代码

import scrapy
import json

from alipan.items import MzPicItem


class MzpicSpider(scrapy.Spider):
    name = 'mzpic'
    start_urls = ['xxxxxxxx']

    def parse(self, response):
        try:
            obj = json.loads(response.text)
            is_ok = obj["code"]
            if is_ok != "200":
                raise Exception("错误的请求:" + str(is_ok))
            item = MzPicItem()
            item["url"] = obj["data"]["imgurl"]
            yield item
	    # 重复抓取
            yield scrapy.Request(self.start_urls[0], callback=self.parse,dont_filter=True)

        except Exception as e:
            print(e)