RajeshKrSahoo · January 28, 2021 12:52
diff --git a/NaukriScrapping.py b/NaukriScrapping.py
 ##With Naukri data
 #### Used selenium as direct request module was not able to get the HTML element so Selenium helped to achieve so

 from selenium import webdriver
 from bs4 import BeautifulSoup
 import time
 import pandas as pd
 from selenium.webdriver.chrome.options import Options
 header = {
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
 }

 # read search terms from csv into a list
 search_terms = [
 "",
 "Python",
 "SQL",
 "R",
 "Spark",
 "Hadoop",
 "Java",
 "Tableau",
 "AWS",
 "SAS",
 "Hive",
 "Scala",
 "Excel",
 "TensorFlow",
 "C++",
 "Azure",
 "NoSQL",
 "Linux",
 "C",
 "Matlab",
 "Scikit-learn",
 "Pandas",
 "Git",
 "Keras",
 "Javascript",
 "Pig",
 "Hbase",
 "Google Cloud",
 "Docker",
 "NumPy",
 "PyTorch",
 "C#",
 "SPSS",
 "MySQL",
 "Perl",
 "Cassandra",
 "MongoDB",
 "GCP",
 "Kubernetes",
 "D3",
 "Databricks",
 "postgresql",
 "Caffe",
 "Airflow",
 "Alteryx",
 "BigQuery",
 # "Fastai",
 ]


 naukri_list = []
 for term in search_terms:
    url=f'https://www.naukri.com/data-science-{term}-jobs?k=data%20science%2C%20'
    CHROMEDRIVER_PATH = '/usr/local/bin/chromedriver'
    options = Options()
    options.headless = True
    driver = webdriver.Chrome(CHROMEDRIVER_PATH, options=options)
    try:
        print(url)
    
        driver.get(url)

        time.sleep(2)

        soup = BeautifulSoup(driver.page_source,'html5lib')

        # print(soup.prettify())

        # driver.close()
        try:
            data=soup.find('span',class_='fleft grey-text mr-5 fs12').get_text().strip().split()[-1]
            print(data, term)
            naukri_list.append(data)
        except:
            naukri_list.append('0')
            
        
    except Exception as e:
        print(f'error: {e}')
        
 naukri_list
	##With Naukri data
	#### Used selenium as direct request module was not able to get the HTML element so Selenium helped to achieve so

	from selenium import webdriver
	from bs4 import BeautifulSoup
	import time
	import pandas as pd
	from selenium.webdriver.chrome.options import Options
	header = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
	"X-Requested-With": "XMLHttpRequest"
	}

	# read search terms from csv into a list
	search_terms = [
	"",
	"Python",
	"SQL",
	"R",
	"Spark",
	"Hadoop",
	"Java",
	"Tableau",
	"AWS",
	"SAS",
	"Hive",
	"Scala",
	"Excel",
	"TensorFlow",
	"C++",
	"Azure",
	"NoSQL",
	"Linux",
	"C",
	"Matlab",
	"Scikit-learn",
	"Pandas",
	"Git",
	"Keras",
	"Javascript",
	"Pig",
	"Hbase",
	"Google Cloud",
	"Docker",
	"NumPy",
	"PyTorch",
	"C#",
	"SPSS",
	"MySQL",
	"Perl",
	"Cassandra",
	"MongoDB",
	"GCP",
	"Kubernetes",
	"D3",
	"Databricks",
	"postgresql",
	"Caffe",
	"Airflow",
	"Alteryx",
	"BigQuery",
	# "Fastai",
	]


	naukri_list = []
	for term in search_terms:
	url=f'https://www.naukri.com/data-science-{term}-jobs?k=data%20science%2C%20'
	CHROMEDRIVER_PATH = '/usr/local/bin/chromedriver'
	options = Options()
	options.headless = True
	driver = webdriver.Chrome(CHROMEDRIVER_PATH, options=options)
	try:
	print(url)

	driver.get(url)

	time.sleep(2)

	soup = BeautifulSoup(driver.page_source,'html5lib')

	# print(soup.prettify())

	# driver.close()
	try:
	data=soup.find('span',class_='fleft grey-text mr-5 fs12').get_text().strip().split()[-1]
	print(data, term)
	naukri_list.append(data)
	except:
	naukri_list.append('0')


	except Exception as e:
	print(f'error: {e}')

	naukri_list