Skip to content

Instantly share code, notes, and snippets.

View malev's full-sized avatar

Marcos Vanetta malev

View GitHub Profile
@malev
malev / convert.sh
Last active August 29, 2015 13:56
Convert encoding
for file in *.txt; do
iconv -f ISO-8859-1 -t utf-8 "$file" > "${file%.txt}.utf8.txt"
done
@malev
malev / generalize.rb
Created March 31, 2014 21:08
Generalize
require 'csv'
require 'json'
require 'uri'
require 'ostruct'
require 'httparty'
require 'progressbar'
require 'people'
class OutputCSV
attr_reader :filename
@malev
malev / addresses_matcher.py
Created April 15, 2014 22:02
Addresses matcher
import sys
import time
import csv
class AddressesFinder:
def __init__(self, addresses):
self.addresses = addresses
def call(self):
output = []
@malev
malev / answer.py
Last active August 29, 2015 14:00
Coreferencer
class Answer(object):
"""docstring for answer
>>> answer = Answer(1, [1,3])
>>> answer.included()
True
>>> answer.excluded()
False
>>> answer.includes()
[1, 3]
>>> answer = Answer(2, [1, 2, 3])
@malev
malev / statify.py
Created April 22, 2014 19:19
Filter Medicare dataset by state
#!/usr/bin/env python
import os
import csv
import glob
import optparse
class CSVHandler:
def __init__(self, filename):
@malev
malev / gender_detection.rb
Created May 5, 2014 16:36
Gender detection testing app
require 'csv'
require 'net/http'
require 'json'
require 'beauvoir'
require 'sexmachine'
names_with_gender = []
CSV.foreach('input.csv') do |row|
names_with_gender << row
@malev
malev / gender_detection.py
Created May 5, 2014 16:37
Gender detection testing app (genderPredictor)
import csv
from genderPredictor import genderPredictor
gp = genderPredictor()
gp.trainAndTest()
def gender(name):
output = 'unknown'
tmp = gp.classify(name)
@malev
malev / convert.rb
Created May 13, 2014 16:33
Clean Uruguay names & gender dataset
# encoding: UTF-8
require 'csv'
filename = 'nombre_nacim_por_anio_y_sexo.csv'
class Name
attr_reader :name, :gender, :male_count, :female_count, :year
def self.valid?(name)
@malev
malev / srap_ba.rb
Created May 13, 2014 16:45
Scrapper for list of names in BA
# encoding: UTF-8
require 'open-uri'
require 'nokogiri'
require 'csv'
def gen_url(offset=0)
if offset == 0
"http://www.buenosaires.gob.ar/areas/registrocivil/nombres/busqueda/buscador_nombres.php?&menu_id=16082"
else
@malev
malev / scraper.rb
Created July 18, 2014 14:11
Turbot scraper example
require 'open-uri'
require 'json'
require 'mechanize'
require 'pdf-reader'
require 'turbotlib'
SOURCE_URL = "http://www.cityofchicago.org/city/en/depts/doit/supp_info/list_of_contractors.html"