Skip to content

Instantly share code, notes, and snippets.

@cpcloud
cpcloud / ignore.py
Created December 20, 2022 18:29
ruff example of checking ignored files
def foo():
try:
pass
except:
print()
@cpcloud
cpcloud / flake.lock
Created December 18, 2022 18:07
torch wheel build with poetry2nix; nixos compat and hacks around broken nvidia install behavior
{
"nodes": {
"flake-utils": {
"locked": {
"lastModified": 1667395993,
"narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f",
"type": "github"
@cpcloud
cpcloud / flake.lock
Created December 18, 2022 17:46
torch wheel build with poetry2nix
{
"nodes": {
"flake-utils": {
"locked": {
"lastModified": 1667395993,
"narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f",
"type": "github"
@cpcloud
cpcloud / getschema.py
Created November 28, 2022 20:07
Extract parquet metadata from URLs
"""
Given a URL pointing to a Parquet file, how do I get the schema from that file
as cheaply as possible?
Example URL (75 MB):
https://storage.googleapis.com/ibis-tutorial-data/wowah_data/wowah_data_raw.parquet
"""
PARQUET_MAGIC = b"PAR1"
@cpcloud
cpcloud / titlebasicsdebug.tsv
Created November 16, 2022 17:06
reproducer tsv file
tconst titleType primaryTitle originalTitle isAdult startYear endYear runtimeMinutes genres
tt0030540 movie Outside of Paradise Outside of Paradise 0 1938 \N 68 Comedy,Musical,Romance
tt0030541 movie The Strange Case of Dr. Meade The Strange Case of Dr. Meade 0 1938 \N 60 Adventure,Drama,Romance
tt0030542 movie Over She Goes Over She Goes 0 1937 \N 78 Musical
tt0030543 movie Over the Wall Over the Wall 0 1938 \N 67 Drama,Romance
tt0030544 movie The Overland Express The Overland Express 0 1938 \N 55 Western
tt0030545 movie Overland Stage Raiders Overland Stage Raiders 0 1938 \N 55 Western
tt0030546 movie To the Victor Owd Bob 0 1938 \N 78 Comedy,Drama,Family
tt0030547 movie Father of More Than Four Padre de más de cuatro 0 1938 \N 110 Comedy
tt0030548 movie Painted Desert Painted Desert 0 1938 \N 58 Western
@cpcloud
cpcloud / joinmovies.py
Last active November 4, 2022 13:33
comparision of pandas and ibis + duckdb on a real use case
import contextlib
import time
from datetime import timedelta
import ibis
import pandas as pd
from ibis import _
def pandas():
@cpcloud
cpcloud / script.py
Created February 10, 2022 16:26
ibis ffill
from pprint import pprint as print
import duckdb
import ibis
garbage = """
DROP TABLE IF EXISTS demo_data;
CREATE TABLE demo_data (
event_id INT
@cpcloud
cpcloud / flake.lock
Created February 7, 2022 10:57
mkdocstrings reproducible example
{
"nodes": {
"flake-utils": {
"locked": {
"lastModified": 1644229661,
"narHash": "sha256-1YdnJAsNy69bpcjuoKdOYQX0YxZBiCYZo4Twxerqv7k=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "3cecb5b042f7f209c56ffd8371b2711a290ec797",
"type": "github"
@cpcloud
cpcloud / anaylze-bigquery.R
Created January 13, 2022 02:07
ibis ci analysis
library(ggplot2)
library(stringr)
library(tidyr)
library(ggh4x)
library(bigrquery)
library(DBI)
library(dplyr, warn.conflicts = FALSE)
library(lubridate, warn.conflicts = FALSE)
@cpcloud
cpcloud / test_3163.py
Created January 7, 2022 17:48
The world's most annoying bug
import sqlalchemy as sa
import sqlparse
def issue_sql(con):
part = con.table("part")
supplier = con.table("supplier")
partsupp = con.table("partsupp")
q = part.join(partsupp, part.P_PARTKEY == partsupp.PS_PARTKEY)