Skip to content

Instantly share code, notes, and snippets.

@grole01
Created April 5, 2021 08:46
Show Gist options
  • Save grole01/406c7ce797f2814bdf8f981056ef627d to your computer and use it in GitHub Desktop.
Save grole01/406c7ce797f2814bdf8f981056ef627d to your computer and use it in GitHub Desktop.
google_sheets
{
"type": "service_account",
"project_id": "testscraper-307218",
"private_key_id": "5b7a4bdc6d21048934083bb4f623b9940985f953",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCzY34Od+MONfE3\nc82y7+7Ga7FjSPjXSce8oBLg2pQ/l7JsyI+LcHO1XKIXIkdhm08pWyyw4PourRus\n7/vEKGps0Q431xDedXE22vIFqsZz2i9LtFFREtEWd+YJIRsJtQCP+bGOa3v+0Q60\nTzmhaOP7/0Np26N2OTR1WRXrCfZOMr6YSoVvMbzTjjuADPmz4wHgSiZmqyv+8lVn\nC+bEFGMiMkTbMVLzjnDdjPPyXMAdTKAHwjaZFqsDiQfn9wFyzHwH9FGm4WtcNq7l\nI7PLx6G4nH/IGRsQEnqFWuipkj8bhT3m88uMcxjgkAJD+Zl60PXG88xo54O20gA9\nXqYXChbzAgMBAAECggEADwifL4Pj7HSgLYnaoEyz/GmuvmexgGFiMtr7b2ACHT46\nVBw5eMceJgHdt7CbaNZaO64KFcdqUBpuf6obtbm+tDXk9NBk99pseDDAfbHChe0F\nb65smNh0hhHa88LpgCvCIKQdZEElGBrZxPSqxhiDRW2EGc1av/VVVyGge50kc4hw\nWlpdtuENtaDWvyoKz5ix1W3otCn7vFA99mj6mbBXtAj/2Fdkr316SiHc3atkSBSX\nAqhniBl3C6+IaVMB2c1dsZI1tN9xH+pKc8kk/WSGMiSqAb1uIlnB8IXN5jCPD1Zf\nq5Eg5+8zqbjSKv3+F5ZiQ2lnnXQM7yLF0M/sh2MBgQKBgQDviM4+yvMltR/+mSwA\n0xT8bwpChVR9qDAQFhPuk+c6JY+tl80i/S+c0j4GXYgZVxhiELRWJp5YIZHGtj53\n7Mf4kAFQzJOoUV6pex1WPJU3SkAt4xSgQw1BfUno2ZAGFV0HnmdsQ4SQaxPASxJQ\n8IC+lPmMK40FOHpgQbB+RvOpQQKBgQC/uEZgKDmXUfTI42yGY4IGtesxW+BfddFo\nArpdlNOWcQz+FeNytsVJzN/TDP7x512YyB0cCdRrUKd399eeQix/apeSYdcHmu2p\njpYTbHqyVd2YPfIDJYXqisIpT8cP/wVgz3a9C+3iO/IQG5ruejnhcQqYK0jDm6yF\nM5QNpmCfMwKBgQDMPvDuJguSWZFAfMdCpLo+TUiIVDwpMWJaNRkl08PUlvaeZNQs\nQ4t3wuXUc6h7QwKti+TZmPBn4af1sqwtgh85WUFl7irx2QBIr4viXqI1lQCIoBqU\nVNhWwUhmXi0qosjCmFVDwYalxkf/LYQetBRzbYAj/3atvuzKavjdlA5sgQKBgGuN\n131OU7C+e5LPcyyZWnKlUAJqxp8HG6k8Do09Vs5JMb7BTM4oFtnmI5Lk8YmIAIwb\nh6kZIp/pqWm2ErJn8Uqx48JtPRK5Kac7pJnU+6hcX2pvJfv56vy/R1mhtc0VhlDf\nUn+P165Zbtz2f55V127P2d+wOKv0A37ehqp5si7dAoGAWnmmrRhK5shVSFsjiEzZ\nfz2icyrPyP29MSiTzHxF5OlZxVoHQ7S02SFvnTbCC9X9v4QI64BZFPUHFh84AVtk\ntX03JbcS5sPbZWbxd84scnV71Tju9M6IE6avT06pHIlsHaLgh6RCuOU4JYFPLRIb\n0N5C+RMRjtZMJcq40YAt8Gg=\n-----END PRIVATE KEY-----\n",
"client_email": "[email protected]",
"client_id": "114963129894349703611",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/testscraper%40testscraper-307218.iam.gserviceaccount.com"
}
import selenium
from selenium import webdriver
from bs4 import BeautifulSoup
from parsel import Selector
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
import gspread
import pandas as pd
#chromeOptions=Options()
#chromeOptions.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install())#, options=options)
driver.get("https://www.sgdi.gov.sg/ministries/mci/departments/au")
#soup= BeautifulSoup(driver.page_source,"html.parser")
sel = Selector(text=driver.page_source)
info=sel.xpath("/html/body/form/div[6]//ul/li")
data2=[]
for quote in info:
name=quote.xpath(".//span[1]/div[2]/text()").extract_first()
#print((quote.xpath(".//span[2]/div[1]/text()").extract_first()).join(quote.xpath(".//span[2]/div[1]/u/text()").extract_first()))
tel=quote.xpath(".//span[2]/div[1]/u/text()").extract_first().join(quote.xpath(".//span[2]/div[1]/text()").extract_first())
mail=quote.xpath(".//span[2]/div[2]/text()").extract_first()
data={
"Name" : name,
"Tel" : tel,
"Email" : mail
}
print(data)
data2.append(data)
df=pd.DataFrame(data2)
gc = gspread.service_account(filename="creds.json")
sh = gc.open("scrapetosheet").sheet1
#sh.update("A1","test")
sh.update([df.columns.values.tolist()] + df.values.tolist())
driver.close()
# This is a sample Python script.
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint.
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print_hi('PyCharm')
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
import gspread
gc=gspread.service_account(filename="creds.json")
sh=gc.open("scrapetosheet").sheet1
#sh.update("A1","test")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment