Last active
April 29, 2023 19:03
-
-
Save eliasdabbas/79b7e625d819cd7b5b140111cd071247 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import advertools as adv | |
import pandas as pd | |
pd.options.display.max_columns = None | |
# Copied from https://en.wikipedia.org/wiki/List_of_cancer_types | |
cancers = { | |
"Chondrosarcoma": "Bone and muscle sarcoma" , | |
"Ewing's sarcoma": "Bone and muscle sarcoma" , | |
"osteosarcoma": "Bone and muscle sarcoma" , | |
"Osteosarcoma": "Bone and muscle sarcoma" , | |
"Rhabdomyosarcoma": "Bone and muscle sarcoma" , | |
"Leiomyosarcoma": "Bone and muscle sarcoma" , | |
"Myxosarcoma": "Bone and muscle sarcoma" , | |
"Astrocytoma": "Brain and nervous system" , | |
"Brainstem glioma": "Brain and nervous system" , | |
"Pilocytic astrocytoma": "Brain and nervous system" , | |
"Ependymoma": "Brain and nervous system" , | |
"Primitive neuroectodermal tumor": "Brain and nervous system" , | |
"Cerebellar astrocytoma": "Brain and nervous system" , | |
"Cerebral astrocytoma": "Brain and nervous system" , | |
"Glioblastoma": "Brain and nervous system" , | |
"Glioma": "Brain and nervous system" , | |
"Medulloblastoma": "Brain and nervous system" , | |
"Neuroblastoma": "Brain and nervous system" , | |
"Oligodendroglioma": "Brain and nervous system" , | |
"Pineal astrocytoma": "Brain and nervous system" , | |
"Pituitary adenoma": "Brain and nervous system" , | |
"hypothalamic glioma": "Brain and nervous system" , | |
"Breast cancer": "Breast", | |
"Inflammatory breast cancer": "Breast", | |
"Invasive lobular carcinoma": "Breast", | |
"Tubular carcinoma": "Breast", | |
"Invasive cribriform carcinoma": "Breast", | |
"Medullary carcinoma": "Breast", | |
"Male breast cancer": "Breast", | |
"Phyllodes tumor": "Breast", | |
"Adrenocortical carcinoma": "Endocrine system" , | |
"Islet cell carcinoma": "Endocrine system" , | |
"Parathyroid cancer": "Endocrine system" , | |
"Pheochromocytoma": "Endocrine system" , | |
"Thyroid cancer": "Endocrine system" , | |
"Merkel cell carcinoma": "Endocrine system" , | |
"Uveal melanoma": "Eye" , | |
"Retinoblastoma": "Eye" , | |
"Optic nerve glioma": "Eye" , | |
"Anal cancer": "Gastrointestinal" , | |
"Appendix cancer": "Gastrointestinal" , | |
"Cholangiocarcinoma": "Gastrointestinal" , | |
"Carcinoid tumor, gastrointestinal": "Gastrointestinal" , | |
"Colon cancer": "Gastrointestinal" , | |
"Extrahepatic bile duct cancer": "Gastrointestinal" , | |
"Gallbladder cancer": "Gastrointestinal" , | |
"stomach cancer": "Gastrointestinal" , | |
"Gastrointestinal carcinoid tumor": "Gastrointestinal" , | |
"Gastrointestinal stromal tumor": "Gastrointestinal" , | |
"Hepatocellular cancer": "Gastrointestinal" , | |
"Pancreatic cancer, islet cell": "Gastrointestinal" , | |
"Rectal cancer": "Gastrointestinal" , | |
"Small intestine cancer": "Gastrointestinal" , | |
"Bladder cancer": "Genitourinary and gynecologic" , | |
"Cervical cancer": "Genitourinary and gynecologic" , | |
"Endometrial cancer": "Genitourinary and gynecologic" , | |
"Extragonadal germ cell tumor": "Genitourinary and gynecologic" , | |
"Ovarian cancer": "Genitourinary and gynecologic" , | |
"Ovarian epithelial cancer": "Genitourinary and gynecologic" , | |
"Ovarian germ cell tumor": "Genitourinary and gynecologic" , | |
"Penile cancer": "Genitourinary and gynecologic" , | |
"Kidney cancer": "Genitourinary and gynecologic" , | |
"Renal cell carcinoma": "Genitourinary and gynecologic" , | |
"Renal pelvis cancer": "Genitourinary and gynecologic" , | |
"Prostate cancer": "Genitourinary and gynecologic" , | |
"Testicular cancer": "Genitourinary and gynecologic" , | |
"Gestational trophoblastic tumor": "Genitourinary and gynecologic" , | |
"Urethral cancer": "Genitourinary and gynecologic" , | |
"Uterine sarcoma": "Genitourinary and gynecologic" , | |
"Vaginal cancer": "Genitourinary and gynecologic" , | |
"Vulvar cancer": "Genitourinary and gynecologic" , | |
"Wilms tumor": "Genitourinary and gynecologic" , | |
"nephroblastoma": "Genitourinary and gynecologic" , | |
"Esophageal cancer": "Head and neck" , | |
"Head and neck cancer": "Head and neck" , | |
"Nasopharyngeal carcinoma": "Head and neck" , | |
"Oral cancer": "Head and neck" , | |
"Oropharyngeal cancer": "Head and neck" , | |
"Paranasal sinus and nasal cavity cancer": "Head and neck" , | |
"Pharyngeal cancer": "Head and neck" , | |
"Salivary gland cancer": "Head and neck" , | |
"Hypopharyngeal cancer": "Head and neck" , | |
"Acute biphenotypic leukemia": "Hematopoietic" , | |
"Acute eosinophilic leukemia": "Hematopoietic" , | |
"Acute lymphoblastic leukemia": "Hematopoietic" , | |
"Acute myeloid leukemia": "Hematopoietic" , | |
"Acute myeloid dendritic cell leukemia": "Hematopoietic" , | |
"AIDS-related lymphoma": "Hematopoietic" , | |
"Anaplastic large cell lymphoma": "Hematopoietic" , | |
"Angioimmunoblastic T-cell lymphoma": "Hematopoietic" , | |
"B-cell prolymphocytic leukemia": "Hematopoietic" , | |
"Burkitt's lymphoma": "Hematopoietic" , | |
"Chronic lymphocytic leukemia": "Hematopoietic" , | |
"Chronic myelogenous leukemia": "Hematopoietic" , | |
"Cutaneous T-cell lymphoma": "Hematopoietic" , | |
"Diffuse large B-cell lymphoma": "Hematopoietic" , | |
"Follicular lymphoma": "Hematopoietic" , | |
"Hairy cell leukemia": "Hematopoietic" , | |
"Hepatosplenic T-cell lymphoma": "Hematopoietic" , | |
"Hodgkin's lymphoma": "Hematopoietic" , | |
"Intravascular large B-cell lymphoma": "Hematopoietic" , | |
"Large granular lymphocytic leukemia": "Hematopoietic" , | |
"Lymphoplasmacytic lymphoma": "Hematopoietic" , | |
"Lymphomatoid granulomatosis": "Hematopoietic" , | |
"Mantle cell lymphoma": "Hematopoietic" , | |
"Marginal zone B-cell lymphoma": "Hematopoietic" , | |
"Mast cell leukemia": "Hematopoietic" , | |
"Mediastinal large B cell lymphoma": "Hematopoietic" , | |
"Multiple myeloma": "Hematopoietic" , | |
"Myelodysplastic syndromes": "Hematopoietic" , | |
"Mucosa-associated lymphoid tissue lymphoma": "Hematopoietic" , | |
"Mycosis fungoides": "Hematopoietic" , | |
"Nodal marginal zone B cell lymphoma": "Hematopoietic" , | |
"Non-Hodgkin lymphoma": "Hematopoietic" , | |
"Precursor B lymphoblastic leukemia": "Hematopoietic" , | |
"Primary central nervous system lymphoma": "Hematopoietic" , | |
"Primary cutaneous follicular lymphoma": "Hematopoietic" , | |
"Primary cutaneous immunocytoma": "Hematopoietic" , | |
"Primary effusion lymphoma": "Hematopoietic" , | |
"Plasmablastic lymphoma": "Hematopoietic" , | |
"Sézary syndrome": "Hematopoietic" , | |
"Splenic marginal zone lymphoma": "Hematopoietic" , | |
"T-cell prolymphocytic leukemia": "Hematopoietic" , | |
"Basal cell carcinoma": "Skin" , | |
"Squamous cell carcinoma": "Skin" , | |
"Squamous cell skin cancer": "Skin" , | |
"Skin adnexal tumors ": "Skin" , | |
"Melanoma": "Skin" , | |
"Merkel cell carcinoma": "Skin" , | |
"Keratoacanthoma": "Skin" , | |
"Adenocarcinoma of the lung": "Thoracic and respiratory", | |
"Bronchial adenomas": "Thoracic and respiratory", | |
"Small cell lung cancer": "Thoracic and respiratory", | |
"Mesothelioma": "Thoracic and respiratory", | |
"cell lung cancer": "Thoracic and respiratory", | |
"cell lung carcinoma": "Thoracic and respiratory", | |
"Pleuropulmonary blastoma": "Thoracic and respiratory", | |
"Laryngeal cancer": "Thoracic and respiratory", | |
"Thymoma and thymic carcinoma": "Thoracic and respiratory", | |
"Squamous-cell carcinoma of the lung": "Thoracic and respiratory", | |
} | |
symptoms = [f'{cancer} symptoms' for cancer in cancers.keys()] | |
treatment = [f'{cancer} treatment' for cancer in cancers.keys()] | |
symptoms_serp = adv.serp_goog(cx=cx, key=key, q=symptoms) | |
treatment_serp = adv.serp_goog(cx=cx, key=key, q=treatment) | |
# serp_heatmap function definition: https://bit.ly/3vIb2yK | |
for cancertype in serp['cancer_type'].unique(): | |
df = serp[serp['cancer_type'].eq(cancertype)] | |
fig = serp_heatmap(df) | |
fig.layout.title = f"Cancer type: <b>{cancertype} ({df['searchTerms'].nunique()} keywords)</b>" | |
fig.show() | |
print() | |
print() | |
# top N domain per SERP rank | |
N = 5 | |
(serp | |
.groupby(['rank', 'displayLink']) | |
['displayLink'] | |
.count() | |
.to_frame() | |
.rename(columns={'displayLink': 'count'}) | |
.reset_index() | |
.rename(columns={'displayLink': 'domain'}) | |
.sort_values(['rank', 'count'], ascending=[True, False]) | |
.reset_index(drop=True) | |
.groupby('rank').head(N) | |
.style.bar(subset=['count'], color='darkgray') | |
.hide(axis='index') | |
.set_caption('<h2>Top 5 appearances per SERP rank</h2>')) |
NameError: name 'cx' is not defined
you need to have a custom search engine and get its id.
This should be saved as a variable called cx.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Raw data: https://bit.ly/3LrX4YF