Elias Dabbas eliasdabbas

💭

https://github.com/eliasdabbas/langchain-advertools

#DigitalMarketing meets #DataScience #advertools #Python #Dashboards #SEO #SEM #Plotly #Dash

eliasdabbas / app.py

Created December 27, 2024 02:37

A demo Dash app demonstrating how to run uv scripts remotely

	# /// script
	# requires-python = ">=3.12"
	# dependencies = [
	# "dash",
	# "dash-bootstrap-components",
	# ]
	# ///

	from dash import Dash, html, dcc, Output, Input
	import dash_bootstrap_components as dbc

eliasdabbas / direct_labeling_plotly_express.py

Last active November 6, 2024 12:25

Direct labeling with plotly.express

	import plotly.express as px

	fig = px.line(
	stocks,
	x='date',
	template='plotly_white',
	y=stocks.columns[1:],
	color_discrete_sequence=cat_scale,
	labels={'y': 'stock price'},
	height=600,)

eliasdabbas / cartogram.py

Last active November 6, 2024 12:07

Create a cartogram by assigning size to flags (or two letter symbols) without displaying map lines

	import plotly.express as px
	import adviz
	import numpy as np
	import pandas as pd

	population = pd.read_html('https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population')
	pop = population[0][['Location', 'Population']].copy()
	flags = []
	for country in pop['Location']:
	try:

eliasdabbas / filter_non_200_status_codes.py

Created June 2, 2024 19:17

Filter non 200 status codes on a daily basis

	import os
	import datetime

	import pandas as pd

	today = datetime.datetime.now(datetime.UTC).strftime('%Y_%m_%d')

	url_status_time = pd.concat(
	pd.read_json(f'/path/to/status_codes/{file}',
	lines=True)

eliasdabbas / daily_status_code.py

Created June 1, 2024 20:00

Setting up a daily status code checker

	import datetime

	import advertools as adv

	today = datetime.datetime.now(datetime.UTC).strftime("%Y_%m_%d")

	sitemap = adv.sitemap_to_df("https://example/sitemap.xml")

	adv.crawl_headers(
	sitemap["loc"],

eliasdabbas / create_list_crawler_for_sheets.py

Created May 22, 2024 11:19

Create and SEO crawler in list mode working on Google Sheets

	import pandas as pd
	xpath = pd.read_html('https://advertools.readthedocs.io/en/master/advertools.code_recipes.spider_strategies.html')
	df = xpath[2].iloc[:, [1, 2]]
	df = df.assign(expression=[f'=textjoin("@@",100,IMPORTXML(A2,"{expression}"))' for expression in df['XPath Expression']])
	display(df[['Suggested Name', 'expression']].T)
	df[['Suggested Name', 'expression']].T.to_clipboard(index=False)

eliasdabbas / url_to_html_sitemap.py

Last active April 16, 2024 10:03

Create an HTML sitemap for a list of URLs and their anchor text

	# Create anchors for all letters:

	import string
	print('<h3>' + '    '.join([f'<a href="#{letter}">{letter}</a>' for letter in string.ascii_uppercase]) + '</h3>')

	# Assuming you have a DataFrame with the columns "full_name" and "loc":

	for letter in string.ascii_uppercase:
	df = players_df[players_static['full_name'].str[0].eq(letter)]
	print()

eliasdabbas / url_to_xml_sitemap.py

Created April 15, 2024 12:15

Convert a list of URLs to an XML sitemap

	import datetime
	import pandas as pd
	lastmod = datetime.datetime.now(datetime.UTC).strftime('%Y-%m-%d')
	url_list = [
	'https://nbastats.pro/player/Eric_Moreland',
	'https://nbastats.pro/player/Marc_Iavaroni',
	'https://nbastats.pro/player/Keith_Tower',
	'https://nbastats.pro/player/Hakeem_Olajuwon',
	'https://nbastats.pro/player/Mike_Price',
	'https://nbastats.pro/player/Doug_Collins',

eliasdabbas / running_crawls.py

Last active November 2, 2023 13:33

Get a summary of the currently running crawl jobs (using the advertools crawler)

	from subprocess import run
	from functools import partial

	run = partial(run, text=True, capture_output=True)

	def running_crawls():
	"""Get details of currently running spiders.

	Get a DataFrame showing the following details:

eliasdabbas / news_headlines_automated.py

Created October 21, 2023 14:14

Get the main headline story from the homepages of new websites.

	import advertools as adv

	url_xpath_selectors = {
	'https://www.ft.com': ('main_story_headline', '//span[contains(@class, "text text--color-black text-display--scale-7 text--weight-500")]/text()'),
	'https://www.nytimes.com': ('main_story_headline', '//h3[@class="indicate-hover css-si8ren"]/text()'),
	'https://www.economist.com': ('main_story_headline', '//a[@data-analytics="top_stories:headline_1"]/text()'),
	'https://edition.cnn.com': ('main_story_headline', '//h2[@class="container__title_url-text container_lead-package__title_url-text"]/text()'),
	'https://www.nbcnews.com': ('main_story_headline', '//h2[@class="storyline__headline founders-cond fw6 important large headlineOnly"]/text()'),
	'https://www.bbc.com': ('main_story_headline', '//a[@rev="hero1\|headline"]/text()'),
	'https://www.foxnews.com': ('main_story_headline', '(//header[@class="info-header"])[1]//a/text()'),