Skip to content

Instantly share code, notes, and snippets.

@eliasdabbas
Last active April 29, 2023 19:03
Show Gist options
  • Save eliasdabbas/79b7e625d819cd7b5b140111cd071247 to your computer and use it in GitHub Desktop.
Save eliasdabbas/79b7e625d819cd7b5b140111cd071247 to your computer and use it in GitHub Desktop.
import advertools as adv
import pandas as pd
pd.options.display.max_columns = None
# Copied from https://en.wikipedia.org/wiki/List_of_cancer_types
cancers = {
"Chondrosarcoma": "Bone and muscle sarcoma" ,
"Ewing's sarcoma": "Bone and muscle sarcoma" ,
"osteosarcoma": "Bone and muscle sarcoma" ,
"Osteosarcoma": "Bone and muscle sarcoma" ,
"Rhabdomyosarcoma": "Bone and muscle sarcoma" ,
"Leiomyosarcoma": "Bone and muscle sarcoma" ,
"Myxosarcoma": "Bone and muscle sarcoma" ,
"Astrocytoma": "Brain and nervous system" ,
"Brainstem glioma": "Brain and nervous system" ,
"Pilocytic astrocytoma": "Brain and nervous system" ,
"Ependymoma": "Brain and nervous system" ,
"Primitive neuroectodermal tumor": "Brain and nervous system" ,
"Cerebellar astrocytoma": "Brain and nervous system" ,
"Cerebral astrocytoma": "Brain and nervous system" ,
"Glioblastoma": "Brain and nervous system" ,
"Glioma": "Brain and nervous system" ,
"Medulloblastoma": "Brain and nervous system" ,
"Neuroblastoma": "Brain and nervous system" ,
"Oligodendroglioma": "Brain and nervous system" ,
"Pineal astrocytoma": "Brain and nervous system" ,
"Pituitary adenoma": "Brain and nervous system" ,
"hypothalamic glioma": "Brain and nervous system" ,
"Breast cancer": "Breast",
"Inflammatory breast cancer": "Breast",
"Invasive lobular carcinoma": "Breast",
"Tubular carcinoma": "Breast",
"Invasive cribriform carcinoma": "Breast",
"Medullary carcinoma": "Breast",
"Male breast cancer": "Breast",
"Phyllodes tumor": "Breast",
"Adrenocortical carcinoma": "Endocrine system" ,
"Islet cell carcinoma": "Endocrine system" ,
"Parathyroid cancer": "Endocrine system" ,
"Pheochromocytoma": "Endocrine system" ,
"Thyroid cancer": "Endocrine system" ,
"Merkel cell carcinoma": "Endocrine system" ,
"Uveal melanoma": "Eye" ,
"Retinoblastoma": "Eye" ,
"Optic nerve glioma": "Eye" ,
"Anal cancer": "Gastrointestinal" ,
"Appendix cancer": "Gastrointestinal" ,
"Cholangiocarcinoma": "Gastrointestinal" ,
"Carcinoid tumor, gastrointestinal": "Gastrointestinal" ,
"Colon cancer": "Gastrointestinal" ,
"Extrahepatic bile duct cancer": "Gastrointestinal" ,
"Gallbladder cancer": "Gastrointestinal" ,
"stomach cancer": "Gastrointestinal" ,
"Gastrointestinal carcinoid tumor": "Gastrointestinal" ,
"Gastrointestinal stromal tumor": "Gastrointestinal" ,
"Hepatocellular cancer": "Gastrointestinal" ,
"Pancreatic cancer, islet cell": "Gastrointestinal" ,
"Rectal cancer": "Gastrointestinal" ,
"Small intestine cancer": "Gastrointestinal" ,
"Bladder cancer": "Genitourinary and gynecologic" ,
"Cervical cancer": "Genitourinary and gynecologic" ,
"Endometrial cancer": "Genitourinary and gynecologic" ,
"Extragonadal germ cell tumor": "Genitourinary and gynecologic" ,
"Ovarian cancer": "Genitourinary and gynecologic" ,
"Ovarian epithelial cancer": "Genitourinary and gynecologic" ,
"Ovarian germ cell tumor": "Genitourinary and gynecologic" ,
"Penile cancer": "Genitourinary and gynecologic" ,
"Kidney cancer": "Genitourinary and gynecologic" ,
"Renal cell carcinoma": "Genitourinary and gynecologic" ,
"Renal pelvis cancer": "Genitourinary and gynecologic" ,
"Prostate cancer": "Genitourinary and gynecologic" ,
"Testicular cancer": "Genitourinary and gynecologic" ,
"Gestational trophoblastic tumor": "Genitourinary and gynecologic" ,
"Urethral cancer": "Genitourinary and gynecologic" ,
"Uterine sarcoma": "Genitourinary and gynecologic" ,
"Vaginal cancer": "Genitourinary and gynecologic" ,
"Vulvar cancer": "Genitourinary and gynecologic" ,
"Wilms tumor": "Genitourinary and gynecologic" ,
"nephroblastoma": "Genitourinary and gynecologic" ,
"Esophageal cancer": "Head and neck" ,
"Head and neck cancer": "Head and neck" ,
"Nasopharyngeal carcinoma": "Head and neck" ,
"Oral cancer": "Head and neck" ,
"Oropharyngeal cancer": "Head and neck" ,
"Paranasal sinus and nasal cavity cancer": "Head and neck" ,
"Pharyngeal cancer": "Head and neck" ,
"Salivary gland cancer": "Head and neck" ,
"Hypopharyngeal cancer": "Head and neck" ,
"Acute biphenotypic leukemia": "Hematopoietic" ,
"Acute eosinophilic leukemia": "Hematopoietic" ,
"Acute lymphoblastic leukemia": "Hematopoietic" ,
"Acute myeloid leukemia": "Hematopoietic" ,
"Acute myeloid dendritic cell leukemia": "Hematopoietic" ,
"AIDS-related lymphoma": "Hematopoietic" ,
"Anaplastic large cell lymphoma": "Hematopoietic" ,
"Angioimmunoblastic T-cell lymphoma": "Hematopoietic" ,
"B-cell prolymphocytic leukemia": "Hematopoietic" ,
"Burkitt's lymphoma": "Hematopoietic" ,
"Chronic lymphocytic leukemia": "Hematopoietic" ,
"Chronic myelogenous leukemia": "Hematopoietic" ,
"Cutaneous T-cell lymphoma": "Hematopoietic" ,
"Diffuse large B-cell lymphoma": "Hematopoietic" ,
"Follicular lymphoma": "Hematopoietic" ,
"Hairy cell leukemia": "Hematopoietic" ,
"Hepatosplenic T-cell lymphoma": "Hematopoietic" ,
"Hodgkin's lymphoma": "Hematopoietic" ,
"Intravascular large B-cell lymphoma": "Hematopoietic" ,
"Large granular lymphocytic leukemia": "Hematopoietic" ,
"Lymphoplasmacytic lymphoma": "Hematopoietic" ,
"Lymphomatoid granulomatosis": "Hematopoietic" ,
"Mantle cell lymphoma": "Hematopoietic" ,
"Marginal zone B-cell lymphoma": "Hematopoietic" ,
"Mast cell leukemia": "Hematopoietic" ,
"Mediastinal large B cell lymphoma": "Hematopoietic" ,
"Multiple myeloma": "Hematopoietic" ,
"Myelodysplastic syndromes": "Hematopoietic" ,
"Mucosa-associated lymphoid tissue lymphoma": "Hematopoietic" ,
"Mycosis fungoides": "Hematopoietic" ,
"Nodal marginal zone B cell lymphoma": "Hematopoietic" ,
"Non-Hodgkin lymphoma": "Hematopoietic" ,
"Precursor B lymphoblastic leukemia": "Hematopoietic" ,
"Primary central nervous system lymphoma": "Hematopoietic" ,
"Primary cutaneous follicular lymphoma": "Hematopoietic" ,
"Primary cutaneous immunocytoma": "Hematopoietic" ,
"Primary effusion lymphoma": "Hematopoietic" ,
"Plasmablastic lymphoma": "Hematopoietic" ,
"Sézary syndrome": "Hematopoietic" ,
"Splenic marginal zone lymphoma": "Hematopoietic" ,
"T-cell prolymphocytic leukemia": "Hematopoietic" ,
"Basal cell carcinoma": "Skin" ,
"Squamous cell carcinoma": "Skin" ,
"Squamous cell skin cancer": "Skin" ,
"Skin adnexal tumors ": "Skin" ,
"Melanoma": "Skin" ,
"Merkel cell carcinoma": "Skin" ,
"Keratoacanthoma": "Skin" ,
"Adenocarcinoma of the lung": "Thoracic and respiratory",
"Bronchial adenomas": "Thoracic and respiratory",
"Small cell lung cancer": "Thoracic and respiratory",
"Mesothelioma": "Thoracic and respiratory",
"cell lung cancer": "Thoracic and respiratory",
"cell lung carcinoma": "Thoracic and respiratory",
"Pleuropulmonary blastoma": "Thoracic and respiratory",
"Laryngeal cancer": "Thoracic and respiratory",
"Thymoma and thymic carcinoma": "Thoracic and respiratory",
"Squamous-cell carcinoma of the lung": "Thoracic and respiratory",
}
symptoms = [f'{cancer} symptoms' for cancer in cancers.keys()]
treatment = [f'{cancer} treatment' for cancer in cancers.keys()]
symptoms_serp = adv.serp_goog(cx=cx, key=key, q=symptoms)
treatment_serp = adv.serp_goog(cx=cx, key=key, q=treatment)
# serp_heatmap function definition: https://bit.ly/3vIb2yK
for cancertype in serp['cancer_type'].unique():
df = serp[serp['cancer_type'].eq(cancertype)]
fig = serp_heatmap(df)
fig.layout.title = f"Cancer type: <b>{cancertype} ({df['searchTerms'].nunique()} keywords)</b>"
fig.show()
print()
print()
# top N domain per SERP rank
N = 5
(serp
.groupby(['rank', 'displayLink'])
['displayLink']
.count()
.to_frame()
.rename(columns={'displayLink': 'count'})
.reset_index()
.rename(columns={'displayLink': 'domain'})
.sort_values(['rank', 'count'], ascending=[True, False])
.reset_index(drop=True)
.groupby('rank').head(N)
.style.bar(subset=['count'], color='darkgray')
.hide(axis='index')
.set_caption('<h2>Top 5 appearances per SERP rank</h2>'))
@eliasdabbas
Copy link
Author

eliasdabbas commented Apr 28, 2022

@Rafalbala
Copy link

NameError Traceback (most recent call last)
in
152 treatment = [f'{cancer} treatment' for cancer in cancers.keys()]
153
--> 154 symptoms_serp = adv.serp_goog(cx=cx, key=key, q=symptoms)
155 treatment_serp = adv.serp_goog(cx=cx, key=key, q=treatment)
156

NameError: name 'cx' is not defined

@eliasdabbas
Copy link
Author

NameError: name 'cx' is not defined you need to have a custom search engine and get its id.
This should be saved as a variable called cx.

https://cse.google.com/cse/

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment