sizhky’s gists

sizhky / sample_gt.json

Last active November 4, 2024 16:20

sample tables for TEDS evaluation

	{
	"full-table": {
	"html": "<html><body><table> <thead> <tr> <th>S.No</th> <th>Description</th> <th>Qty</th> <th>Unit Price ($)</th> <th>Total ($)</th> </tr> </thead> <tbody> <tr> <td>1</td> <td>Monitor 4k</td> <td>1</td> <td>320</td> <td>320</td> </tr> <tr> <td>2</td> <td>Keyboard</td> <td>1</td> <td>50</td> <td>50</td> </tr> <tr> <td>3</td> <td>LEDs</td> <td>100</td> <td>1</td> <td>100</td> </tr> <tr> <td>4</td> <td>MiniLEDs</td> <td>100</td> <td>1</td> <td>100</td> </tr> </tbody> </table> </body> </html>"
	},
	"missing-rows": {
	"html": "<html><body><table> <thead> <tr> <th>S.No</th> <th>Description</th> <th>Qty</th> <th>Unit Price ($)</th> <th>Total ($)</th> </tr> </thead> <tbody> <tr> <td>1</td> <td>Monitor 4k</td> <td>1</td> <td>320</td> <td>320</td> </tr> <tr> <td>2</td> <td>Keyboard</td> <td>1</td> <td>50</td> <td>50</td> </tr> <tr> <td>3</td> <td>LEDs</td> <td>100</td> <td>1</td> <td>100</td> </tr> <tr> <td>4</td> <td>MiniLEDs</td> <td>100</td> <td>1</td> <td>100</td> </tr> </

sizhky / parse_bruno.py

Created October 14, 2024 05:23

Parse all chunks of bruno

	from torch_snippets import json, readlines
	import re

	def parse_bruno(file):
	def process_chunks(chunk):
	a, *b = chunk.split('\n')
	a, _b = a.split()
	b = '\n'.join([_b, *b])
	if a == 'body:json':
	b = json.loads(b[1:-1])

sizhky / ticktock.py

Created December 9, 2023 19:15

ticktock timer for time profiling


	from time import perf_counter
	class Ticker:
	def __init__(self):
	self.tik = perf_counter()

	def __call__(self, arg, info=None, newl=False):
	tok = perf_counter()
	t = round(tok-self.tik, 3)
	if newl: print()

sizhky / clean_mem.py

Created December 8, 2023 13:00

clean memory

	import traceback, gc

	def clean_ipython_hist():
	# Code in this function mainly copied from IPython source
	if not 'get_ipython' in globals(): return
	ip = get_ipython()
	user_ns = ip.user_ns
	ip.displayhook.flush()
	pc = ip.displayhook.prompt_count + 1
	for n in range(1, pc): user_ns.pop('_i'+repr(n),None)

sizhky / conda_utils.sh

Last active January 8, 2024 08:48

utilities for conda environments

	create_conda_env() {
	# Check if the environment name and Python version are provided as arguments
	if [ $# -lt 2 ]; then
	echo "Please provide the conda environment name and Python version as arguments."
	return 1
	fi

	# Create conda environment with the specified Python version
	conda create -n "$1" "$2"

sizhky / pandas_query.py

Created April 20, 2023 05:17

	Sure, here is a summary of the 20 examples of using `pd.DataFrame.query()` to filter data:

	1. Filter on exact string match: `df.query('name == "Alice"')`
	2. Filter on multiple conditions: `df.query('age > 30 and city == "New York"')`
	3. Filter on a list of values: `df.query('age in [25, 30, 35]')`
	4. Filter on a range of values: `df.query('age.between(25, 35)')`
	5. Filter on string contains: `df.query('name.str.contains("an")')`
	6. Filter on string starts with: `df.query('name.str.startswith("A")')`
	7. Filter on string ends with: `df.query('name.str.endswith("a")')`
	8. Filter on regular expression: `df.query('name.str.match("^[Aa].*a$")')`

sizhky / .gitignore

Created November 10, 2022 09:47

Python's gitignore

	# Data
	*.csv
	*.png
	*.jpg
	*.jpeg
	*.pdf
	*.json

	# Byte-compiled / optimized / DLL files
	__pycache__/

sizhky / matches.csv

Created September 13, 2022 15:21

We can make this file beautiful and searchable if this error is corrected: It looks like row 6 should actually have 17 columns, instead of 11 in line 5.

	id,city,date,player_of_match,venue,neutral_venue,team1,team2,toss_winner,toss_decision,winner,result,result_margin,eliminator,method,umpire1,umpire2
	335982,Bangalore,2008-04-18,BB McCullum,M Chinnaswamy Stadium,0,Royal Challengers Bangalore,Kolkata Knight Riders,Royal Challengers Bangalore,field,Kolkata Knight Riders,runs,140,N,NA,Asad Rauf,RE Koertzen
	335983,Chandigarh,2008-04-19,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",0,Kings XI Punjab,Chennai Super Kings,Chennai Super Kings,bat,Chennai Super Kings,runs,33,N,NA,MR Benson,SL Shastri
	335984,Delhi,2008-04-19,MF Maharoof,Feroz Shah Kotla,0,Delhi Daredevils,Rajasthan Royals,Rajasthan Royals,bat,Delhi Daredevils,wickets,9,N,NA,Aleem Dar,GA Pratapkumar
	335985,Mumbai,2008-04-20,MV Boucher,Wankhede Stadium,0,Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,bat,Royal Challengers Bangalore,wickets,5,N,NA,SJ Davis,DJ Harper
	335986,Kolkata,2008-04-20,DJ Hussey,Eden Gardens,0,Kolkata Knight Riders,Deccan Chargers,Deccan Chargers,bat,Kolkata Knigh

sizhky / io.py

Created November 11, 2021 15:16

io

	from torch_snippets import *
	def io(func):
	def inner(args, *kwargs):
	Info(f'\nargs: {args}\nkwargs: {kwargs}', depth=1)
	o = func(args, *kwargs)
	Info(f'\noutput: {o}', depth=1)
	return o
	return inner

sizhky / add_date_parts.py

Created September 8, 2021 16:38

	def add_datepart(df, fldname, drop=True, time=False):
	"Helper function that adds columns relevant to a date."
	fld = df[fldname]
	fld_dtype = fld.dtype
	if isinstance(fld_dtype, pd.core.dtypes.dtypes.DatetimeTZDtype):
	fld_dtype = np.datetime64

	if not np.issubdtype(fld_dtype, np.datetime64):
	df[fldname] = fld = pd.to_datetime(fld, infer_datetime_format=True)
	targ_pre = re.sub('[Dd]ate$', '', fldname)

sizhky sizhky