This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.express as px | |
fig = px.line( | |
stocks, | |
x='date', | |
template='plotly_white', | |
y=stocks.columns[1:], | |
color_discrete_sequence=cat_scale, | |
labels={'y': 'stock price'}, | |
height=600,) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.express as px | |
import adviz | |
import numpy as np | |
import pandas as pd | |
population = pd.read_html('https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population') | |
pop = population[0][['Location', 'Population']].copy() | |
flags = [] | |
for country in pop['Location']: | |
try: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import datetime | |
import pandas as pd | |
today = datetime.datetime.now(datetime.UTC).strftime('%Y_%m_%d') | |
url_status_time = pd.concat( | |
pd.read_json(f'/path/to/status_codes/{file}', | |
lines=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import advertools as adv | |
today = datetime.datetime.now(datetime.UTC).strftime("%Y_%m_%d") | |
sitemap = adv.sitemap_to_df("https://example/sitemap.xml") | |
adv.crawl_headers( | |
sitemap["loc"], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
xpath = pd.read_html('https://advertools.readthedocs.io/en/master/advertools.code_recipes.spider_strategies.html') | |
df = xpath[2].iloc[:, [1, 2]] | |
df = df.assign(expression=[f'=textjoin("@@",100,IMPORTXML(A2,"{expression}"))' for expression in df['XPath Expression']]) | |
display(df[['Suggested Name', 'expression']].T) | |
df[['Suggested Name', 'expression']].T.to_clipboard(index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create anchors for all letters: | |
import string | |
print('<h3>' + ' '.join([f'<a href="#{letter}">{letter}</a>' for letter in string.ascii_uppercase]) + '</h3>') | |
# Assuming you have a DataFrame with the columns "full_name" and "loc": | |
for letter in string.ascii_uppercase: | |
df = players_df[players_static['full_name'].str[0].eq(letter)] | |
print() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import pandas as pd | |
lastmod = datetime.datetime.now(datetime.UTC).strftime('%Y-%m-%d') | |
url_list = [ | |
'https://nbastats.pro/player/Eric_Moreland', | |
'https://nbastats.pro/player/Marc_Iavaroni', | |
'https://nbastats.pro/player/Keith_Tower', | |
'https://nbastats.pro/player/Hakeem_Olajuwon', | |
'https://nbastats.pro/player/Mike_Price', | |
'https://nbastats.pro/player/Doug_Collins', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from subprocess import run | |
from functools import partial | |
run = partial(run, text=True, capture_output=True) | |
def running_crawls(): | |
"""Get details of currently running spiders. | |
Get a DataFrame showing the following details: | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import advertools as adv | |
url_xpath_selectors = { | |
'https://www.ft.com': ('main_story_headline', '//span[contains(@class, "text text--color-black text-display--scale-7 text--weight-500")]/text()'), | |
'https://www.nytimes.com': ('main_story_headline', '//h3[@class="indicate-hover css-si8ren"]/text()'), | |
'https://www.economist.com': ('main_story_headline', '//a[@data-analytics="top_stories:headline_1"]/text()'), | |
'https://edition.cnn.com': ('main_story_headline', '//h2[@class="container__title_url-text container_lead-package__title_url-text"]/text()'), | |
'https://www.nbcnews.com': ('main_story_headline', '//h2[@class="storyline__headline founders-cond fw6 important large headlineOnly"]/text()'), | |
'https://www.bbc.com': ('main_story_headline', '//a[@rev="hero1|headline"]/text()'), | |
'https://www.foxnews.com': ('main_story_headline', '(//header[@class="info-header"])[1]//a/text()'), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import advertools as adv | |
adv.crawl( | |
# start crawling from this URL(s): | |
url_list='https://en.wikipedia.org/wiki/Main_Page', | |
# save the crawl output to this file: | |
output_file='/home/user_name/wikipedia_en_crawl.jl', | |
# Should it follow links? | |
follow_links=True, | |
# But don't follow all links, only links that match this regex: |
NewerOlder