This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.graph_objects as go | |
import pandas as pd | |
def serp_heatmap(df, num_domains=10, select_domain=None): | |
df = df.rename(columns={'domain': 'displayLink', | |
'searchTerms': 'keyword'}) | |
top_domains = df['displayLink'].value_counts()[:num_domains].index.tolist() | |
top_domains = df['displayLink'].value_counts()[:num_domains].index.tolist() | |
top_df = df[df['displayLink'].isin(top_domains) & df['displayLink'].ne('')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from urllib.parse import urlsplit | |
import advertools as adv | |
sites = [ | |
'https://www.who.int', | |
'https://www.nytimes.com', | |
'https://www.washingtonpost.com', | |
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# !pip install --upgrade transformers plotly pandas | |
import plotly.graph_objects as go | |
import pandas as pd | |
pd.options.display.max_columns = None | |
from transformers import pipeline | |
unmasker = pipeline('fill-mask', model='bert-base-uncased') | |
results = [] | |
cars = ['mercedes', 'audi', 'bmw', 'volkswagen', 'ford', 'toyota', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import networkx as nx | |
import pandas as pd | |
def score_links(links_file, domain): | |
"""Score a network on links based on their importance and centrality. | |
links_file: Path to the file having the links (needs a "Source" and | |
"Destination" columns) e.g. ScreamingFrog's outlinks file. | |
domain: Filter all links, making sure they all point to the domain you want. | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ipaddress | |
import requests | |
import pandas as pd | |
def bot_ip_addresses(): | |
bots_urls = { | |
'google': 'https://developers.google.com/search/apis/ipranges/googlebot.json', | |
'bing': 'https://www.bing.com/toolbox/bingbot.json' | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Utility functions to create a WXR file for Wordpress. | |
""" | |
import datetime | |
import os | |
from lxml import etree as ET | |
from lxml.etree import CDATA | |
# XML namespaces declarations |