pip3 install virtualenv
virtualenv myvenv
.\myvenv\Scripts\activate
pip install Scrapy
conda create --name venv python=3.7
conda activate venv
conda install -c conda-forge scrapy
scrapy startproject tutorial
scrapy crawl quotes
scrapy shell 'http://quotes.toscrape.com/page/1/' response.css('title') response.css('title::text').getall() response.xpath('//title/text()').get()
scrapy crawl quotes -o quotes.json
response.css('li.next a::attr(href)').get() response.css('li.next a').attrib['href']
for href in response.css('li.next a::attr(href)'): yield response.follow(href, callback=self.parse)
scrapy crawl quotes -o quotes-humor.json -a tag=humor
def __init__(self, *args, **kwargs):
super(QDQSpider, self).__init__(*args, **kwargs)
self.start_urls = [
"http://es.qdq.com/"+ kwargs.get('category') +"/"
]
process_spider_input(response, spider): if response.status == 200 #do what ever you want print 'OK 200' else: print 'error on request. Retry'