Created
January 28, 2020 07:07
-
-
Save PandaWhoCodes/7d9b5ca0b81716568371cb6715271eb7 to your computer and use it in GitHub Desktop.
Indeed job scraping ( old code )
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
import re | |
jobkey = input("Keyword: ") #Get the job title/keyword input from user as string | |
region = input("Location: ") #Get the geographic region from user as string | |
jobkey=jobkey.replace(" ","+") | |
url = "http://www.indeed.co.in/jobs?q={0}&l={1}".format(jobkey, region) #create the url, including the search terms | |
f = requests.get(url) | |
soup = BeautifulSoup(f.text, 'lxml') | |
divs = soup.findAll("h2", { "class" : "jobtitle" }) | |
alljobs=[] | |
for i in divs: | |
#print(i) | |
currentjob=[] | |
try: | |
title=re.findall(r'title="(.*)">',str(i))[0] | |
url1="http://www.indeed.co.in"+re.findall(r'href="(.*)" itemprop=',str(i))[0] | |
except Exception as e: | |
print(e) | |
continue | |
currentjob.append(title) | |
currentjob.append(url1) | |
alljobs.append(currentjob) | |
for jobs in alljobs: | |
print(jobs[0]+"::"+jobs[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment