A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
| import pandas as pd | |
| import scipy.sparse as sps | |
| df = pd.DataFrame({'tag1': ['sean', 'udi', 'bogdan'], 'tag2': ['sean', 'udi', 'udi'], 'freq': [1,2,3]}) | |
| # tag1 -> rows, tag2 -> columns | |
| df.set_index(['tag1', 'tag2'], inplace=True) | |
| mat = sps.coo_matrix((df.freq, (df.index.labels[0], df.index.labels[1]))) | |
| print(mat.todense()) |
| #!/usr/bin/python | |
| import urllib2 | |
| from bs4 import BeautifulSoup | |
| # Abre a pagina principal do IBGE onde contem os links para os estados. | |
| html = urllib2.urlopen('http://cidades.ibge.gov.br/xtras/home.php').read() | |
| # Pega o conteudo da pagina em HTML e joga para o BeautifulSoup mapear as tags | |
| soup = BeautifulSoup(html) |
A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
| """ Unsupervised evaluation metrics. """ | |
| # License: BSD Style. | |
| from itertools import combinations | |
| import numpy as np | |
| from sklearn.utils import check_random_state | |
| from sklearn.metrics.pairwise import distance_metrics | |
| from sklearn.metrics.pairwise import pairwise_distances |
| This is free and unencumbered software released into the public domain. | |
| Anyone is free to copy, modify, publish, use, compile, sell, or | |
| distribute this software, either in source code form or as a compiled | |
| binary, for any purpose, commercial or non-commercial, and by any | |
| means. | |
| In jurisdictions that recognize copyright laws, the author or authors | |
| of this software dedicate any and all copyright interest in the | |
| software to the public domain. We make this dedication for the benefit |
| /*jshint strict:false*/ | |
| /*global CasperError console phantom require*/ | |
| /** | |
| * grab links and push them into xml | |
| */ | |
| var casper = require("casper").create({ | |
| }); |
Roll your own iPython Notebook server with Amazon Web Services (EC2) using their Free Tier.
| <?php | |
| //returns a big old hunk of JSON from a non-private IG account page. | |
| function scrape_insta($username) { | |
| $insta_source = file_get_contents('http://instagram.com/'.$username); | |
| $shards = explode('window._sharedData = ', $insta_source); | |
| $insta_json = explode(';</script>', $shards[1]); | |
| $insta_array = json_decode($insta_json[0], TRUE); | |
| return $insta_array; | |
| } |
Base URL: https://www.google.com/speech-api/v1/recognize
It accepts POST requests with voice file encoded in FLAC format, and query parameters for control.
client
The client's name you're connecting from. For spoofing purposes, let's use chromium
lang
Speech language, for example, ar-QA for Qatari Arabic, or en-US for U.S. English