Skip to content

Instantly share code, notes, and snippets.

# plotly imports
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, plot, iplot
import cufflinks as cf
init_notebook_mode(connected=True)
cf.go_offline()
@xoelop
xoelop / xoel.tpl
Last active December 20, 2019 19:07
Jupyter nbconvert template to create reports where cells marked as skip (for slideshows) are hidden in the report.
{%- extends 'full.tpl' -%}
{%- block header -%}
{{ super() }}
<meta name='robots' content='noindex,nofollow' />
{%- endblock header -%}
{% block any_cell %}
import pandas as pd
data = pd.Series([1, 4, 4, 6, 2, 4, 7, 4, 7])
# data can be a series or a dataframe
# this calculates the inverse quantile for each value of data
inverse_quantiles = data.apply(lambda x: ((data <= x).sum()) / len(data))
# read more: https://stackoverflow.com/a/58186830/5031446
@xoelop
xoelop / cleaning_outliers.py
Created February 11, 2020 15:01
Code to visualize and clean outliers in Python
import pandas as pd
import numpy as np
# dataviz imports
import plotly as py
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
@xoelop
xoelop / simple_parallel_map.py
Created March 18, 2020 09:37
Simple multithreading map function
import concurrent
def parallel_map(function, iterable, max_workers: int = 50):
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool:
result = pool.map(function, iterable)
return list(result)
@xoelop
xoelop / click_all.js
Last active April 9, 2020 11:38
JS code to click every element in a page with the same class name
var className = 'your-class-name'; // Replace your-class-name with the class name of the elements you want to click
var elements = Array.from(document.getElementsByClassName(className));
elements.map(x => x.click())
@xoelop
xoelop / get_tweet_urls.js
Last active April 30, 2020 08:40
Code to get tweet URLs of all the tweets that appear on the browser after scrolling.
// seen on http://makble.com/how-to-copy-text-from-chrome-console-to-file-using-javascript
console.save = function (data, filename) {
// code to save console output as a json file
if (!data) {
console.error('Console.save: No data')
return;
}
if (!filename) filename = 'console.json'
%
SELECT * FROM all_likes
WHERE match(lower(text), replaceRegexpAll({{String(search_term, ' ', description="Return tweets whose text matches this word (can include a regex pattern)", required=True)}}, ' ', '.?'))
AND (
multiSearchAnyCaseInsensitive(screenname, splitByString(',', {{String(usernames, ',', description="User names or handles, comma-separated")}}))
OR multiSearchAnyCaseInsensitive(username, splitByString(',', {{String(usernames, ',', description="User names or handles, comma-separated")}}))
)
AND date >= toStartOfDay(toDate({{Date(since, '2000-09-09', description="Start date, YYYY-MM-DD")}}))
AND date <= addDays(toStartOfDay(toDate({{Date(until, '2100-09-09', description="End date, YYYY-MM-DD")}})), 1)
ORDER BY date desc
SELECT
date,
link,
tweet_text,
ifNull(url, '') url,
ifNull(title, '') title,
ifNull(description, '') description,
media,
format('https://twitter.com/{}', screenname) user_link,
format('{} (@{})', username, screenname) user_name_handle,
import requests
import urllib.parse
import json
def jprint(data: str):
"""Prints JSON-like string (data) nicely"""
print(json.dumps(data, indent=4, ensure_ascii=False))
def ingest_data(datasource: str,