This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xmllint --xpath '//*[@PostTypeId="1"]/@Tags' Posts.xml | sed 's/" Tags="/\n/g' | grep 'machine-learning' | sed 's/<\|>/;/g' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ngrams(sequence, depth): | |
seq = '^' + sequence + '*' | |
res = [] | |
while depth > 0: | |
i, j = 0, depth | |
while j <= len(seq): | |
res.append(seq[i:j]) | |
i += 1 | |
j += 1 | |
depth -= 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script implements the methodology described in Chap. 7 of | |
# Mateos, Pablo. "Names, Ethnicity and Populations". Springer, 2014. | |
# For bipartite (fornames, surnames) graph projection for unsupervised | |
# learning of names ethnicity. | |
# It uses Louvain instead of FastCommunity however. | |
# Works fine with pypy. | |
# | |
# by Antoine Mazières (http://mazier.es ; {github|twitter}@mazieres) | |
# Cortext Lab -- http://www.cortext.net/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
import networkx as nx | |
class TestExtract(unittest.TestCase): | |
def test_graph_from_adj_mat(self): | |
X = np.array([ | |
[(0,), (1,), (2,)], | |
[(1,), (0,), (0,)], | |
[(2,), (0,), (0,)]], dtype=[('weight', '<i8')]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
class TestExtract(unittest.TestCase): | |
def test_mk_sparse(self): | |
# https://en.wikipedia.org/wiki/Sparse_matrix#Dictionary_of_keys_.28DOK.29 | |
raw = {'A': {'x', 'y', 'z'}, 'B': {'w', 'y'}} | |
expected = {('A', 'x'): 1, ('B', 'y'): 1, ('A', 'z'): 1, ('A', 'y'): 1, ('B', 'w'): 1} | |
tested = mk_sparse(raw) | |
msg = '\nExpected:\n{}\nGot:\n{}'.format(expected, tested) | |
self.assertEqual(expected, tested, msg=msg) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def symmetrize(a): | |
# http://stackoverflow.com/a/2573982/1565438 | |
return a + a.T - numpy.diag(a.diagonal()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def point_inside_polygon(coord, poly): | |
''' | |
http://www.ariel.com.au/a/python-point-int-poly.html | |
''' | |
x, y = coord | |
n = len(poly) | |
inside =False | |
p1x,p1y = poly[0] | |
for i in range(n+1): | |
p2x,p2y = poly[i % n] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
class TestExtract(unittest.TestCase): | |
def test_adjacency_matrix(self): | |
X = np.array([ | |
[1, 8, 3], | |
[5, 0, 0], | |
[0, 4, 2]]) | |
tested = adjacency_matrix(X) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
def wannabe_projection(df): | |
''' | |
https://stats.stackexchange.com/questions/142132/is-this-a-valid-method-for-unipartite-projection-of-a-bipartite-graph | |
''' | |
n_samples = df.shape[0] | |
res = np.zeros((n_samples, n_samples)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# by @mazieres for cortext.fr | |
import sqlite3 | |
import sys | |
import os | |
from collections import defaultdict | |
# PATH to the DB downloaded from cortext |