scrapy-playwright:- Downloader/handlers: scrapy.exceptions.NotSupported: AsyncioSelectorReactor
Solution 1:
It's been suggested by the developers of scrapy_playwright
to instantiate the DOWNLOAD_HANDLERS
and TWISTER_REACTOR
into your script.
A similar comment is provided here
Here's a working script implementing just this:
import scrapy
from scrapy_playwright.page import PageCoroutine
from scrapy.crawler import CrawlerProcess
class ProductSpider(scrapy.Spider):
name = 'product'
def start_requests(self):
yield scrapy.Request(
'https://shoppable-campaign-demo.netlify.app/#/',
callback = self.parse,
meta={
'playwright': True,
'playwright_include_page': True,
'playwright_page_coroutines': [
PageCoroutine("wait_for_selector", "div#productListing"),
]
}
)
async def parse(self, response):
container = response.xpath("(//div[@class='col-md-6'])[1]")
for items in container:
yield {
'products':items.xpath("(//h3[@class='card-title'])[1]//text()").get()
}
# parses content
if __name__ == "__main__":
process = CrawlerProcess(
settings={
"TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
"DOWNLOAD_HANDLERS": {
"https": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
"http": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
},
"CONCURRENT_REQUESTS": 32,
"FEED_URI":'Products.jl',
"FEED_FORMAT":'jsonlines',
}
)
process.crawl(ProductSpider)
process.start()
And we get the following output:
{'products': 'Oxford Loafers'}