Last active
September 10, 2016 03:15
-
-
Save Akash-Ansari/ba35a830cd4e15a471aea96ba7db24a2 to your computer and use it in GitHub Desktop.
A Simple Code to Extract Agricultural Jobs Information from bdjobs.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###################################### Scraping Bdjobs.com ############################################## | |
# For selecting css elements, I have used the Chrome Extension "SelectorGadget" along | |
# with the "rvest" (version: 0.3.2) and "xml2" packages. | |
# Machine Info : | |
# R version 3.2.3 (2015-12-10) | |
# Platform: x86_64-w64-mingw32/x64 (64-bit) | |
# Running under: Windows 7 x64 (build 7600) | |
# To check yours type version OR sessionInfo() | |
# Upon installing "rvest", it will also install xml2 as a dependency package. | |
install.packages("rvest") | |
# Loading the library "rvest" and this will automatically load "xml2" | |
library(rvest) | |
# Check the package version if you have already installed "rvest" | |
packageVersion("rvest") | |
# Site Name: Bdjobs | |
page <- read_html("http://jobs.bdjobs.com/jobsearch.asp?fcatId=26&icatId=") | |
all <- ".comp-name-text" | |
nodes <- html_nodes(page, all) | |
CompanyName <- html_text(nodes) | |
CompanyName | |
# Job Title | |
Jtitle <- ".job-title-text" | |
nodes <- html_nodes(page, Jtitle) | |
JobTitle <- html_text(nodes) | |
JobTitle <- gsub("\\r|\\n|\"|\\s+", " ", JobTitle) | |
JobTitle | |
# Education | |
edu <- ".edu-text-d" | |
nodes <- html_nodes(page, edu) | |
Education <- html_text(nodes) | |
Education <- gsub("\\r|\\n|\"|\\s+", " ", Education) | |
Education | |
# Experience | |
Exp <- ".exp-text-d" | |
nodes <- html_nodes(page, Exp) | |
Experience <- html_text(nodes) | |
Experience <- gsub("\\r|\\n|\"|\\s+", " ", Experience) | |
Experience | |
# Deadline | |
Dline <- ".dead-text-d" | |
nodes <- html_nodes(page, Dline) | |
DeadLine <- html_text(nodes) | |
DeadLine <- gsub("\\r|\\n|\"|\\s+", " ", DeadLine) | |
DeadLine | |
Agro_Jobs <- data.frame(CompanyName, JobTitle, Education, Experience, DeadLine) | |
View(Agro_Jobs) | |
# USE RSTUDIO ! | |
# It will ease up your Workflow. |
Thank you vai.
New in github. Trying to explore its features.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Awesome 👍