Created
June 1, 2023 21:03
-
-
Save josepsmartinez/fe75898ea183fb73082461f8c4f3dfd1 to your computer and use it in GitHub Desktop.
simbora
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import io | |
import PyPDF2 # pip install PyPDF2 | |
from seleniumwire import webdriver # pip install selenium-wire | |
def payload_from_gene_data(dataset: list[str], signature, cutoff_1, cutoff_2): | |
return { | |
'methodoption': 'os', | |
'dataset': '\n'.join(dataset), | |
'signature': 'signature', | |
'highcol': '#ff0000', | |
'lowcol': '#0000ff', | |
'groupcutoff1': str(cutoff_1), | |
'groupcutoff2': str(cutoff_2), | |
'axisunit': 'month', | |
'ifhr': 'hr', | |
'ifconf': 'conf', | |
'signature_norm': '', | |
'is_sub': 'false', | |
'subtype': '', | |
} | |
headers = { | |
'Accept': 'application/json, text/javascript, */*; q=0.01', | |
'Accept-Language': 'en-US,en;q=0.9', | |
'Cache-Control': 'no-cache', | |
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
'Origin': 'http://gepia2.cancer-pku.cn', | |
'Pragma': 'no-cache', | |
'Proxy-Connection': 'keep-alive', | |
'Referer': 'http://gepia2.cancer-pku.cn/', | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36', | |
'X-Requested-With': 'XMLHttpRequest', | |
} | |
all_genes_data = [ | |
payload_from_gene_data(dataset=['BRCA', 'COAD'], signature='ERBB2', cutoff_1=50, cutoff_2=50) | |
] | |
driver = webdriver.Chrome(seleniumwire_options={ | |
'suppress_connection_errors': False | |
}) | |
driver.get('http://gepia2.cancer-pku.cn/#survival') | |
cookies = requests.cookies.RequestsCookieJar() | |
for cookie in driver.get_cookies(): | |
cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'], path=cookie['path']) | |
# breakpoint() | |
for data in all_genes_data: | |
# gera PDF no server | |
response = requests.post( | |
'http://gepia2.cancer-pku.cn/assets/PHP4/survival_zf.php', | |
data=data, | |
verify=False, | |
cookies=cookies, | |
headers=headers) | |
# baixa PDF | |
response_dict = eval(response.content.decode()) | |
response = requests.get( | |
f'http://gepia2.cancer-pku.cn/tmp/{response_dict["outdir"]}', | |
cookies=cookies, | |
headers=headers) | |
pdf_bytes = response.content | |
# TODO: processa PDF | |
pdf_bytes.append('%%EOF\r\n') | |
read_pdf = PyPDF2.PdfReader(io.BytesIO(pdf_bytes)) | |
page = read_pdf.getPage(0) | |
page_content = page.extractText() | |
breakpoint() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment