Skip to content

Instantly share code, notes, and snippets.

View pcbje's full-sized avatar

Petter Christian Bjelland pcbje

View GitHub Profile
@pcbje
pcbje / to_array.js
Created December 13, 2015 12:48
Convert tabular text (csv, tsv, etc.) to javascript array
var to_array = function(text, delimeter, has_header, header_size) {
var lines = text.split('\n');
var columns = has_header ? lines[header_size - 1].split(delimeter) : null;
var rows = [];
for (var i=header_size; i<lines.length; i++) {
var array = lines[i].split(delimeter);
var row = {};
for (var j in array) {
tika:
image: logicalspark/docker-tikaserver
cpu_shares: 4
ports:
- "9998:9998"
elasticsearch:
image: elasticsearch
cpu_shares: 4
command: elasticsearch -Des.http.cors.enabled=true -Dhttp.cors.allow-origin=*
# License: GPLv3
# https://github.com/aboSamoor/polyglot/blob/master/LICENSE
import polyglot
from polyglot.text import Text, Word
from flask import Flask, request
app = Flask(__name__)
# sudo yum install libicu libicu-devel.x86_64 gcc-c++ python3-devel
# pip3 install morfessor pycld2 pyicu numpy six polyglot Flask
#!/usr/bin/python
# -*- coding: utf-8 -*-
import base64
import hashlib
from datetime import datetime
import struct
import logging
import json
makecluster = Makecluster()
makecluster.addPair(1, 2)
makecluster.addPair(2, 3)
makecluster.addPair(4, 5)
print (makecluster.getClusters())
class Makecluster(object):
def __init__(self):
self.clusters = {}
self.edges = {}
self.reverse_index = {}
self.clusterIndex = 0
def getClusters(self):
return self.clusters
version: '2'
volumes:
postgres_data:
driver: local
app_data:
driver: local
services:
postgres:
package filter
import (
"fmt"
"log"
)
var bytehash = []uint32{
0x12bd9527, 0xf4140cea, 0x987bd6e1, 0x79079850, 0xafbfd539, 0xd350ce0a,
0x82973931, 0x9fc32b9c, 0x28003b88, 0xc30c13aa, 0x6b678c34, 0x5844ef1d,
import os
import re
def extract_count(line):
lower_line = line.lower()
numbers = re.findall(' (cero|uno|dos|tres|cuatro|cinco|seis|siete|ocho|nueve)(?:$|,| )', lower_line)
if len(numbers) != 4:
return None
text = ''
040701002.02.1.0001
130201012.03.1.0001
190208003.01.1.0001
130101006.02.1.0001
171501003.01.1.0001
020201015.01.1.0001
122003002.01.1.0001
190210001.02.1.0001
130501031.01.1.0001
130701075.01.1.0001