Skip to content

Instantly share code, notes, and snippets.

View do-me's full-sized avatar

Dominik Weckmüller do-me

View GitHub Profile
@do-me
do-me / dict_to_json.py
Created March 28, 2025 16:49
Save and load python dict as gzipped json
import json, gzip
def load_json(file_path):
try:
with gzip.open(file_path, 'rt', encoding='utf-8') as f:
return json.load(f)
except OSError: #if the file is not gzipped
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
@do-me
do-me / deck.py
Created March 4, 2025 20:32
Simple pydeck deck.gl Lineplot layer
import pydeck as pdk
df = pd.read_json("https://raw.githubusercontent.com/visgl/deck.gl-data/master/examples/line/heathrow-flights.json")
INITIAL_VIEW_STATE = pdk.ViewState(latitude=47.65, longitude=7, zoom=4.5, max_zoom=16, pitch=50, bearing=0)
line_layer = pdk.Layer(
"LineLayer",
df,
get_source_position="start",
@do-me
do-me / deck.py
Last active March 24, 2025 15:39
Simple pydeck deck.gl Scatterplot layer
import pydeck as pdk
import pandas as pd
import numpy as np
### sample data
import duckdb
import geopandas
df = duckdb.sql(f"SELECT * FROM 'geonames_23_03_2025.parquet' WHERE \"1\" = 'London' \
AND \"8\" = 'GB' ").df()
gdf = geopandas.GeoDataFrame(
@do-me
do-me / extract.sh
Last active March 3, 2025 16:16
Pmtiles extract subset from weekly builds
pmtiles extract https://build.protomaps.com/20250303.pmtiles OUTPUT.pmtiles --region=your_subset.geojson
@do-me
do-me / docling.sh
Created February 11, 2025 15:24
Docling bash script for converting a directory of pdfs to a directory of textfiles and a single LLM-ingestable text file
#!/bin/bash
# Set the input directory (where your PDFs are)
INPUT_DIR="." # Current directory, change if needed
# Set the output file name
OUTPUT_FILE="llm_ready.txt"
# Set the temporary directory
TEMP_DIR="temp_pdf_text"
@do-me
do-me / upload.sh
Created February 9, 2025 18:28
Upload many files with git to Huggingface repo
huggingface-cli login # logs in with a suitable HF token
huggingface-cli upload do-me/Eurovoc_English . files # all files must be in cwd
@do-me
do-me / rename.sh
Created January 30, 2025 17:27
Rename all webp files in folder with chronological numbers
find . -maxdepth 1 -type f -name "*.webp" 2>/dev/null | sort | awk '{print NR, $0}' | while read num old_name; do new_name=$(printf "%04d.webp" "$num"); mv "$old_name" "$new_name"; done
@do-me
do-me / trim_embedding_decimals.py
Created January 30, 2025 08:09
Trim embedding decimals from pandas
import pandas as pd
import json
import gzip
# Load data
df = pd.read_json("your_file.json.gz")
# Round embeddings to 4 decimal places
df["embeddings"] = df["embeddings"].apply(lambda emb: [round(e, 4) for e in emb])
@do-me
do-me / Suedtirol_WFS_to_geoparquet.py
Created January 16, 2025 08:44
Südtirol WFS to geoparquet extraction
import requests
import xml.etree.ElementTree as ET
import geopandas as gpd
from io import StringIO
import pandas as pd
from shapely import wkt
import os
def get_feature_types(wfs_url):
"""Extracts feature type names from a WFS GetCapabilities response."""
@do-me
do-me / query.py
Created January 14, 2025 20:09
Test query by lat lon range in duckdb over httpfs
import time
import duckdb
def run_duckdb_query():
# Connect to DuckDB (using an in-memory database as an example)
conn = duckdb.connect(database=':memory:')
# Load the spatial extension
conn.execute("LOAD spatial")