Gilles Louppe glouppe

{"Researcher", "Teacher", "Coder"} #ai #ai4science #deeplearning #bayesian

Recently created

Least recently created

Recently updated

Least recently updated

glouppe / sklearn_vs_wiserf.py

Last active December 19, 2015 11:39

	import numpy
	import random
	from sklearn.datasets import fetch_mldata
	mnist = fetch_mldata('MNIST original')
	# Define training and testing sets
	inds = numpy.arange(len(mnist.data))
	test_i = random.sample(xrange(len(inds)), int(0.1*len(inds)))
	train_i = numpy.delete(inds, test_i)
	X_train = mnist.data[train_i].astype(numpy.double)
	y_train = mnist.target[train_i].astype(numpy.double)

glouppe / nearest_developers.py

Last active December 23, 2015 21:39

Generate a sparse matrix such that rows=users, columns=filenames and data[i, j]=number of commits of user i on file j, and then find the 3 nearest neighbors of each scikit-learn contributor.

	import numpy as np
	import os

	from collections import defaultdict
	from git import Repo
	from scipy.sparse import csc_matrix

	path = "/home/gilles/Sources/scikit-learn/sklearn/"
	extensions = ["py", "pyx", "pxd"]

glouppe / beard_disambiguation.py

Created January 7, 2015 15:49

Disambiguation prototype

	import numpy as np
	import argparse
	import cPickle
	import scipy.cluster.hierarchy as hac

	from itertools import groupby
	from itertools import product
	from scipy.sparse import lil_matrix
	from scipy.sparse import issparse
	from scipy.spatial.distance import squareform

glouppe / beard_export_pairs.py

Last active August 29, 2015 14:13

	import sys
	sys.path.append("/usr/lib/python2.7/dist-packages/")
	sys.path.append("/usr/local/lib/python2.7/dist-packages/")

	import string
	import re

	from joblib import Parallel, delayed
	from invenio.dbquery import run_sql
	from invenio.bibauthorid_dbinterface import get_title_of_paper

glouppe / sklearn_vs_tmva.py

Last active September 3, 2015 12:39

	import numpy as np

	from sklearn.cross_validation import train_test_split
	from sklearn.datasets import fetch_mldata
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.ensemble import ExtraTreesClassifier
	from sklearn.ensemble import GradientBoostingClassifier
	from rep.estimators import TMVAClassifier

	from functools import partial

glouppe / buffering.py

Created February 6, 2019 12:22

	import multiprocessing as mp
	import queue
	import threading

	def buffered_gen_mp(source_gen, buffer_size=2):
	"""
	Generator that runs a slow source generator in a separate process.
	buffer_size: the maximal number of items to pre-generate (length of the buffer)
	"""
	if buffer_size < 2:

glouppe / test_set.csv

Created November 27, 2019 16:39

We can't make this file beautiful and searchable because it's too large.

	SMILES,CHEM_ID
	COC1:C:C(C2C3=C(CCCC3=O)N(C3:C:C:C(C):C:C:3)C3=C2C(=O)CCC3):C([N+](=O)[O-]):C:C:1OC,Chem_1
	O=C1NC(N2CCCCC2)=NC1=CC1:C:C:C:S:1,Chem_2
	COC1:C:C(C=C2C(=O)N(C(=O)C3:C:C:C(Cl):C:C:3)N=C2C):C:C(OC):C:1OC,Chem_3
	CC#CC(O)(C(=O)OC1CCN(C)CC1)C1CCCCC1,Chem_4
	COC1:C:C:C(N=C(C)C(C)=NC2:C:C:C(OC):C:C:2):C:C:1,Chem_5
	CSC1:N:C(O):C(C#N):C(C2:C:C:C(C):C:C:2):N:1,Chem_6
	CSC1:N:C(C2:C:C:C:C:C:2):N:C(N2CCOCC2):[S+]:1.[IH2+],Chem_7
	CC1:C:C:C(C=C2N=C(NN=CC(O)C(O)C(O)CO)NC2=O):C:C:1,Chem_8
	CN(C)C(C1=C(O)C(C2:C:C:C:C:C:2)N(C2:C:C:C:C:C:2)C1=O)N1CCOCC1,Chem_9