mekler’s gists

mekler / noaa_data.py

Created April 9, 2014 04:06

Descarga datos de la página de la NOAA sección forecast

	from bs4 import BeautifulSoup
	import re

	try:
	from urllib2 import urlopen
	except ImportError:
	from urllib.request import urlopen # py3k

	def scrapeNOAA(url):
	soup = BeautifulSoup(urlopen(url))

mekler / items.py

Created March 7, 2014 22:42

Descarga PDFs de la Sep

	# Define here the models for your scraped items
	#
	# See documentation in:
	# http://doc.scrapy.org/en/latest/topics/items.html

	from scrapy.item import Item, Field

	class SepPdfItem(Item):
	# define the fields for your item here like:
	# name = Field()

mekler / wef-hack_2008.py

Last active August 29, 2015 13:56

Código utilizado para pasar archivos del WEF a csv

	<?php
	/*
	################################################################################################
	#Autor: mekler
	#
	#Instrucciones de instalacion
	#sudo apt-get php5-cli
	#
	#ĺinea de comando
	# python wef-hack_2008.py 5626281-Financial-Development-Report-2008/ $(ls -1 5626281-Financial-Development-Report-2008/ \| grep .pdf$)

mekler / censo_2013_inegi.py

Last active August 29, 2015 13:55

Scrapper para descargar la información del Censo Educativo 2013

	# coding=utf-8
	import pycurl
	import StringIO
	import sys
	import ast
	import pymongo
	from pymongo import MongoClient

	def construyeSQL(cadena):
	aux = cadena.split(' ')

mekler / items.py

Created January 10, 2014 23:46

Scrapeo de Snie de la SEP (Segunda parte)

	# Define here the models for your scraped items
	#
	# See documentation in:
	# http://doc.scrapy.org/en/latest/topics/items.html

	from scrapy.item import Item, Field

	class NewDataDownloaderItem(Item):
	# define the fields for your item here like:
	# name = Field()

mekler / primer_estado.php

Last active January 2, 2016 21:39

Scrapeo de Snie de la SEP (Primera parte)

	<?php
	// Defining the basic cURL function
	function curl($url, $postArray=NULL, $srcPage=NULL) {
	$postFields="";
	$ch = curl_init($url); // Initialising cURL
	curl_setopt($ch, CURLOPT_URL, $url); // Setting cURL's URL option with the $url variable passed into the function
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); // Setting cURL's option to return the webpage data
	curl_setopt($ch, CURLOPT_ENCODING, 'identity');

	curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie.txt');

mekler / items.py

Created March 13, 2013 18:07

Scraper hecho en Scrapy para descargar datos del SNIE de la SEP.

	# Define here the models for your scraped items
	#
	# See documentation in:
	# http://doc.scrapy.org/en/latest/topics/items.html

	from scrapy.item import Item, Field

	class SnieSepItem(Item):
	# define the fields for your item here like:
	# name = Field()

mekler / diputados_scrapers.py

Created November 15, 2012 04:55

scraper de diputados en méxico (noviembre-2012) con scrapy

	from scrapy.contrib.spiders import CrawlSpider, Rule
	from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
	from scrapy.selector import HtmlXPathSelector
	from diputados.items import DiputadosItem
	from time import time

	import re

	class DiputadosSpider(CrawlSpider):
	name = 'diputados'

Paco Mekler mekler