Skip to content

Instantly share code, notes, and snippets.

View thomasjpfan's full-sized avatar

Thomas J. Fan thomasjpfan

View GitHub Profile
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
diff --git a/sklearn/utils/_weight_vector.pxd.tp b/sklearn/utils/_weight_vector.pxd.tp
index 9d1779373c..f5e3e4af5a 100644
--- a/sklearn/utils/_weight_vector.pxd.tp
+++ b/sklearn/utils/_weight_vector.pxd.tp
@@ -27,11 +27,12 @@ cdef class WeightVector{{name_suffix}}(object):
cdef readonly {{c_type}}[::1] aw
cdef {{c_type}} *w_data_ptr
cdef {{c_type}} *aw_data_ptr
- cdef {{c_type}} wscale
- cdef {{c_type}} average_a
@thomasjpfan
thomasjpfan / patch.diff
Last active January 3, 2023 23:13
patch_for_debugging scikit-learn#25172
diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
index b0c252fc31..e99465ada3 100644
--- a/sklearn/decomposition/_dict_learning.py
+++ b/sklearn/decomposition/_dict_learning.py
@@ -174,6 +174,8 @@ def _sparse_encode(
)
if init is not None:
+ if not init.flags["WRITEABLE"]:
+ init = np.array(init)
@thomasjpfan
thomasjpfan / gdb.txt
Created October 12, 2022 21:16
Bugging scikit-learn on Python 3.11 ubuntu
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1 0x00007ffff7c66859 in __GI_abort () at abort.c:79
#2 0x00007ffff7cd126e in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff7dfb298 "%s\n")
at ../sysdeps/posix/libc_fatal.c:155
#3 0x00007ffff7cd92fc in malloc_printerr (str=str@entry=0x7ffff7dfd600 "free(): invalid next size (fast)")
at malloc.c:5347
#4 0x00007ffff7cdabac in _int_free (av=0x7ffff7e30b80 <main_arena>, p=0x555559ff7e50, have_lock=0)
at malloc.c:4249
#5 0x00007ffff5e1d2e1 in PyDataMem_UserFREE ()
from /home/thomasfan/Desktop/cpython/.venv/lib/python3.11/site-packages/numpy/core/_multiarray_umath.cpython-311-x86_64-linux-gnu.so
@thomasjpfan
thomasjpfan / implementation_snippet.py
Created August 3, 2022 18:21
Computation backend discussion
nanmin = self.get_computation_routine("nanmin")
nanmax = self.get_computation_routine("nanmax")
# self.get_computation_routine("nanmin_max")
# cupy.array_api.min
data_min = nanmin(X, axis=0)
data_max = nanmax(X, axis=0)
if first_pass:
@thomasjpfan
thomasjpfan / bench_missing.py
Last active March 24, 2023 20:35
Benchmark missing values for trees
"""Instructions
1. Build this PR and run:
```bash
python bench_missing.py bench ~/bench_results pr
```
2. On main run:
```bash
@thomasjpfan
thomasjpfan / benchmark.py
Last active May 4, 2022 03:14
tree spiltter memory view benchmark
from functools import partial
import argparse
from time import perf_counter
from statistics import mean, stdev
from itertools import product
import csv
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.datasets import make_classification, make_regression, make_low_rank_matrix
import numpy as np
@thomasjpfan
thomasjpfan / scikit-learn-pandas-API-proposal-1.ipynb
Created March 31, 2022 03:14
scikit-learn-pandas-api-transform-kwarg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@thomasjpfan
thomasjpfan / bench_trees.ipynb
Last active March 26, 2022 19:32
Tree criterion benchmark
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.