Created
October 18, 2018 10:06
-
-
Save hrbrmstr/dadf8fedb21c32fd67eed8b76ffd696d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(dplyr) | |
pg <- read_html("https://bidplus.gem.gov.in/bidresultlists") | |
blocks <- html_nodes(pg, ".block") | |
items_and_quantity <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Item(s)')]") | |
items <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Item(s)')]/following-sibling::span") %>% html_text(trim=TRUE) | |
quantity <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Quantity')]/following-sibling::span") %>% html_text(trim=TRUE) %>% as.numeric() | |
department_name_and_address <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Department Name And Address')]") %>% | |
html_text(trim=TRUE) %>% | |
gsub("\n", "|", .) %>% | |
gsub("[[:space:]]*\\||\\|[[:space:]]*", "|", .) | |
block_header <- html_nodes(blocks, "div.block_header") | |
html_nodes(block_header, xpath=".//p[contains(@class, 'bid_no')]") %>% | |
html_text(trim=TRUE) %>% | |
gsub("^.*: ", "", .) -> bid_no | |
html_nodes(block_header, xpath=".//p/b[contains(., 'Status')]/following-sibling::span") %>% | |
html_text(trim=TRUE) -> status | |
html_nodes(blocks, xpath=".//strong[contains(., 'Start Date')]/following-sibling::span") %>% | |
html_text(trim=TRUE) -> start_date | |
html_nodes(blocks, xpath=".//strong[contains(., 'End Date')]/following-sibling::span") %>% | |
html_text(trim=TRUE) -> end_date | |
data.frame( | |
bid_no, | |
status, | |
start_date, | |
end_date, | |
items, | |
quantity, | |
department_name_and_address, | |
stringsAsFactors=FALSE | |
) -> xdf | |
xdf$is_ra <- grepl("/RA/", bid_no) | |
str(xdf) | |
## 'data.frame': 10 obs. of 8 variables: | |
## $ bid_no : chr "GEM/2018/B/93066" "GEM/2018/B/93082" "GEM/2018/B/93105" "GEM/2018/B/93999" ... | |
## $ status : chr "Not Evaluated" "Not Evaluated" "Not Evaluated" "Not Evaluated" ... | |
## $ start_date : chr "25-09-2018 03:53:pm" "27-09-2018 09:16:am" "25-09-2018 05:08:pm" "26-09-2018 05:21:pm" ... | |
## $ end_date : chr "18-10-2018 03:00:pm" "18-10-2018 03:00:pm" "18-10-2018 03:00:pm" "18-10-2018 03:00:pm" ... | |
## $ items : chr "automotive chassis fitted with engine" "automotive chassis fitted with engine" "automotive chassis fitted with engine" "Storage System" ... | |
## $ quantity : num 1 1 1 2 90 1 981 6 4 376 | |
## $ department_name_and_address: chr "Department Name And Address:||Ministry Of Steel Na Kirandul Complex N/a" "Department Name And Address:||Ministry Of Steel Na Kirandul Complex N/a" "Department Name And Address:||Ministry Of Steel Na Kirandul Complex N/a" "Department Name And Address:||Maharashtra Energy Department Maharashtra Bhusawal Tps N/a" ... | |
## $ is_ra : logi FALSE FALSE FALSE FALSE FALSE FALSE ... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
sir will it be possible if i will ask you a query you will help me?