Skip to content

Instantly share code, notes, and snippets.

@eliasdabbas
Created May 22, 2024 11:19
Show Gist options
  • Save eliasdabbas/137c0b2f896444bf122924785521f9fa to your computer and use it in GitHub Desktop.
Save eliasdabbas/137c0b2f896444bf122924785521f9fa to your computer and use it in GitHub Desktop.
Create and SEO crawler in list mode working on Google Sheets
import pandas as pd
xpath = pd.read_html('https://advertools.readthedocs.io/en/master/advertools.code_recipes.spider_strategies.html')
df = xpath[2].iloc[:, [1, 2]]
df = df.assign(expression=[f'=textjoin("@@",100,IMPORTXML(A2,"{expression}"))' for expression in df['XPath Expression']])
display(df[['Suggested Name', 'expression']].T)
df[['Suggested Name', 'expression']].T.to_clipboard(index=False)
@eliasdabbas
Copy link
Author

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
title meta_desc viewport charset h1 h2 h3 h4 h5 h6 canonical alt_href alt_hreflang og_props og_content twtr_names twtr_content iframe_src gtm_script gtm_noscript link_rel_rel link_rel_href link_rel_stylesheet css_links nav_links_text nav_links_href header_links_text header_links_href footer_links_text footer_links_href js_script_src js_script_text script_src canonical_parent
=textjoin("@@",100,IMPORTXML(A2,"//title/text()")) =textjoin("@@",100,IMPORTXML(A2,"//meta[@name='description']/@content")) =textjoin("@@",100,IMPORTXML(A2,"//meta[@name='viewport']/@content")) =textjoin("@@",100,IMPORTXML(A2,"//meta[@charset]/@charset")) =textjoin("@@",100,IMPORTXML(A2,"//h1/text()")) =textjoin("@@",100,IMPORTXML(A2,"//h2/text()")) =textjoin("@@",100,IMPORTXML(A2,"//h3/text()")) =textjoin("@@",100,IMPORTXML(A2,"//h4/text()")) =textjoin("@@",100,IMPORTXML(A2,"//h5/text()")) =textjoin("@@",100,IMPORTXML(A2,"//h6/text()")) =textjoin("@@",100,IMPORTXML(A2,"//link[@rel='canonical']/@href")) =textjoin("@@",100,IMPORTXML(A2,"//link[@rel='alternate']/@href")) =textjoin("@@",100,IMPORTXML(A2,"//link[@rel='alternate']/@hreflang")) =textjoin("@@",100,IMPORTXML(A2,"//meta[starts-with(@Property, 'og:')]/@Property")) =textjoin("@@",100,IMPORTXML(A2,"//meta[starts-with(@Property, 'og:')]/@content")) =textjoin("@@",100,IMPORTXML(A2,"//meta[starts-with(@name, 'twitter:')]/@name")) =textjoin("@@",100,IMPORTXML(A2,"//meta[starts-with(@name, 'twitter:')]/@content")) =textjoin("@@",100,IMPORTXML(A2,"//iframe/@src")) =textjoin("@@",100,IMPORTXML(A2,"//script[contains(@src, 'googletagmanager.com/gtm.js?id=')]/@src")) =textjoin("@@",100,IMPORTXML(A2,"//iframe[contains(@src, 'googletagmanager.com/ns.html?id=')]/@src")) =textjoin("@@",100,IMPORTXML(A2,"//link[@rel]/@rel")) =textjoin("@@",100,IMPORTXML(A2,"//link[@rel]/@href")) =textjoin("@@",100,IMPORTXML(A2,"//link[@rel='stylesheet']/@href")) =textjoin("@@",100,IMPORTXML(A2,"//link[contains(@href, '.css')]/@href")) =textjoin("@@",100,IMPORTXML(A2,"//nav//a/text()")) =textjoin("@@",100,IMPORTXML(A2,"//nav//a/@href")) =textjoin("@@",100,IMPORTXML(A2,"//header//a/text()")) =textjoin("@@",100,IMPORTXML(A2,"//header//a/@href")) =textjoin("@@",100,IMPORTXML(A2,"//footer//a/text()")) =textjoin("@@",100,IMPORTXML(A2,"//footer//a/@href")) =textjoin("@@",100,IMPORTXML(A2,"//script[@type='text/javascript']/@src")) =textjoin("@@",100,IMPORTXML(A2,"//script[@type='text/javascript']/text()")) =textjoin("@@",100,IMPORTXML(A2,"//script//@src")) =textjoin("@@",100,IMPORTXML(A2,"name(//link[@rel='canonical']/..)"))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment