Created
September 20, 2017 15:20
-
-
Save shafayeatsumit/a23a985c25d45c0d3dee77673720c868 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | |
import time | |
import re | |
#driver = webdriver.Remote(command_executor='http://127.0.0.1:4444/wd/hub', desired_capabilities=DesiredCapabilities.CHROME) | |
driver = webdriver.Firefox() | |
print("Fetching Data...") | |
driver.get("https://eservice.hsa.gov.sg/prism/ct_r/enquiry.do?action=getAllTherapeuticArea") | |
def extract_detail(driver): | |
pass | |
#return extracted data | |
track = [0,0,0] | |
#track [0] - home page; track[1] - second level page ; track[2] - pagination in second level | |
table = driver.find_element_by_xpath("//table[@class='fmTbl']") | |
table_rows = table.find_elements_by_xpath(".//tr/td/a") | |
for row in table_rows: | |
table = driver.find_element_by_xpath("//table[@class='fmTbl']") | |
table_rows = table.find_elements_by_xpath(".//tr/td/a") | |
table_rows[track[0]].click() | |
time.sleep(10) | |
page_count = driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[4]/tbody/tr/td[1]').text | |
page_count = int(re.search(r'\d+',page_count).group()) | |
number_of_clicks = int(page_count/10) | |
print("page count",page_count) #25 | |
if (page_count > 10 and track[2]>0 ): | |
if (track[2]< number_of_clicks): | |
driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[4]/tbody/tr/td[2]/a[1]').click() | |
time.sleep(10) | |
table_second_level = driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[3]/tbody') | |
table_second_rows = driver.find_elements_by_xpath('.//tr/td/a') | |
if(track[1]<=10): | |
table_second_rows[track[1]].click() | |
print ("end of cycle",e) | |
track[1] = track[1] + 1 | |
if(track[1] == 10): | |
track[1] = 0 | |
time.sleep(10) | |
#need to handle pagination | |
# going to last page (detail parsing) | |
data = extract_detail(driver) | |
driver.get("https://eservice.hsa.gov.sg/prism/ct_r/enquiry.do?action=getAllTherapeuticArea") | |
else: | |
track[0] = track[0] + 1 | |
else: | |
table_second_level = driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[3]/tbody') | |
table_second_rows = driver.find_elements_by_xpath('.//tr/td/a') | |
if(track[1]<=10): | |
try: | |
table_second_rows[track[1]].click() | |
except Exception as e: | |
print ("ending of the cycle",e ) | |
track[0] = track[0] + 1 | |
track[1] = track[1] + 1 | |
time.sleep(10) | |
data = extract_detail(driver) | |
#need to handle pagination | |
# going to last page (detail parsing) | |
driver.get("https://eservice.hsa.gov.sg/prism/ct_r/enquiry.do?action=getAllTherapeuticArea") | |
print("track value before",track[0]) | |
#track[0] = track[0] + 1s | |
print("track value after",track[0]) | |
if (page_count>10): | |
track[2] = track[2] + 1 | |
# table = driver.find_element_by_xpath("//table[@class='fmTbl']") | |
# for row in table.find_elements_by_xpath(".//tr/td/a"): | |
# row.click() | |
# time.sleep(20) | |
# #get table element in second level | |
# table_second_level = driver.find_elements_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[3]/tbody')[0] | |
# print(table_second_level) | |
# for row in table_second_level.find_elements_by_xpath(".//tr/td/a"): | |
# row.click() | |
# time.sleep(10) | |
# driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[5]/tbody/tr/td/div/a/b').click() | |
# time.sleep(10) | |
# driver.clos() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
from selenium import webdriver
import time
import re
import math
driver = webdriver.Firefox()
root_url = "https://eservice.hsa.gov.sg/prism/ct_r/enquiry.do?action=getAllTherapeuticArea"
driver.get(root_url)
page1_rows = driver.find_elements_by_xpath("//table[@Class='fmTbl']/tbody/tr/td/a")
def find_number_of_items(driver):
item_number_list = []
for row in range(len(page1_rows)):
items_by_categ = {}
page1_rows = driver.find_elements_by_xpath("//table[@Class='fmTbl']/tbody/tr/td/a")
name = page1_rows[row].text
page1_rows[row].click()
time.sleep(10)
page_count = driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[4]/tbody/tr/td[1]').text
page_count = int(re.search(r'\d+',page_count).group())
items_by_categ[name] = page_count
driver.get(root_url)
print items_by_categ
item_number_list.append(items_by_categ)
return item_number_list
lis = find_number_of_items(driver)
lis = [7,25,13,17]
for val in lis[1:]:
page1_rows = driver.find_elements_by_xpath("//table[@Class='fmTbl']/tbody/tr/td/a")
indx = lis.index(val)
page1_rows[indx].click()
time.sleep(10)
#for row in range(val):
for row in range(15,25):
page2_rows = driver.find_elements_by_xpath("//table[@Class='fmTbl']/tbody/tr/td/a")
print "row number",row
#this would handle the pagination
if ((row/9.0)>1):
print "inside if"
click_nxtpage_count = int(math.ceil(row/9))
print "nxt page count",click_nxtpage_count
for i in range(click_nxtpage_count):
#need to change the next page clickable
#driver.find_elements_by_xpath("//[contains(text(), 'My Button')]")
driver.find_element_by_xpath('//[@id="page"]/form/table[3]/tbody/tr/td/table[4]/tbody/tr/td[2]/a[1]').click()
time.sleep(10)
page2_rows = driver.find_elements_by_xpath("//table[@Class='fmTbl']/tbody/tr/td/a")
row = row%9
page2_rows[row].click()
time.sleep(10)
driver.get(root_url)
if val-1 == row:
break
page1_rows = driver.find_elements_by_xpath("//table[@Class='fmTbl']/tbody/tr/td/a")
indx = lis.index(val)
page1_rows[indx].click()
time.sleep(10)