ishritam · August 17, 2020 10:42
diff --git a/lv.py b/lv.py
 import scrapy
 import re
 import json
 from pprint import pprint
 from string import ascii_lowercase
 import redis

 class MgLVSpider(scrapy.Spider):
    #name of the spider
    name = 'mg_lv'

    #connect to Redis Server
    clint = redis.Redis(host= "127.0.0.1", port=6379,charset="utf-8", decode_responses=True)

    allowed_domain = 'https://www.1mg.com'
      
    def start_requests(self):
        alphas = list(ascii_lowercase)
        #the pages can be extracted using Xpath, but here I have taken it manualy
        pages = [726,207, 664, 369, 299, 259, 230, 102, 145, 43, 122, 320, 438, 305, 330, 429, 31, 
                396, 384, 421, 71, 191, 53, 49, 16,197 ]


        start_urls= []
        for f, b in zip(alphas, pages):
            for i in range(1,b+1):
                start_url = f'https://www.1mg.com/pharmacy_api_gateway/v4/drug_skus/by_prefix?prefix_term={f}&page={i}&per_page=30'
                start_urls.append(start_url)
        
        for url in start_urls:
                    yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response):

        result = json.loads(response.body)

        for i in range(len(result['data']['skus'])):
            self.clint.lpush('urls', self.allowed_domain +result['data']['skus'][i]['slug'])
	import scrapy
	import re
	import json
	from pprint import pprint
	from string import ascii_lowercase
	import redis

	class MgLVSpider(scrapy.Spider):
	#name of the spider
	name = 'mg_lv'

	#connect to Redis Server
	clint = redis.Redis(host= "127.0.0.1", port=6379,charset="utf-8", decode_responses=True)

	allowed_domain = 'https://www.1mg.com'

	def start_requests(self):
	alphas = list(ascii_lowercase)
	#the pages can be extracted using Xpath, but here I have taken it manualy
	pages = [726,207, 664, 369, 299, 259, 230, 102, 145, 43, 122, 320, 438, 305, 330, 429, 31,
	396, 384, 421, 71, 191, 53, 49, 16,197 ]


	start_urls= []
	for f, b in zip(alphas, pages):
	for i in range(1,b+1):
	start_url = f'https://www.1mg.com/pharmacy_api_gateway/v4/drug_skus/by_prefix?prefix_term={f}&page={i}&per_page=30'
	start_urls.append(start_url)

	for url in start_urls:
	yield scrapy.Request(url=url, callback=self.parse)

	def parse(self, response):

	result = json.loads(response.body)

	for i in range(len(result['data']['skus'])):
	self.clint.lpush('urls', self.allowed_domain +result['data']['skus'][i]['slug'])