This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for more info check out http://webmining.olariu.org/is-winter-really-coming | |
import re | |
from math import log, sqrt | |
import matplotlib.pyplot as pyplot | |
DEPTH = 3 # minimum depth for tree construction = minimum phrase length | |
OCCURRENCES = 10 # minimum number of phrase occurrences | |
text = open('game.txt').read() # reading input data | |
text = text.lower() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for more info check out http://webmining.olariu.org/interview-with-a-lady-gaga-fan | |
# made to be run in the ipython console | |
import urllib, urllib2, time, random | |
import simplejson as json | |
def fetch_url(url, get=None, post=None): | |
user_agent = 'Andrei Olariu\'s Web Mining for Dummies' | |
headers = {'User-Agent': user_agent} | |
if get: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# more info: http://webmining.olariu.org/ubervu-hackaton-relationship-tagcloud | |
from nltk import pos_tag, word_tokenize | |
import en # Nodebox English Linguistics library | |
import urllib, urllib2, re | |
import json | |
from time import time | |
def fetch_url(url, get=None, post=None): | |
user_agent = 'Andrei Olariu\'s Web Mining for Dummies' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# more info at http://webmining.olariu.org/el-clasico-on-twitter | |
# this code is designed to be run in ipython | |
import urllib, urllib2, time, threading, Queue, re | |
from datetime import datetime | |
import simplejson as json | |
import matplotlib.pyplot as plt | |
import numpy as np |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# more info here: http://webmining.olariu.org/the-story-of-the-oscar-predictions | |
import urllib, urllib2, re | |
import json | |
from time import time | |
# using this POS tagger: | |
# http://jasonwiener.com/2006/01/20/simple-nlp-part-of-speech-tagger-in-python/ | |
import NLPlib |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math;f=open('i');r=f.readline | |
for _ in range(1,int(r())+1): | |
m=[];s=b=0 | |
for i in range(int(r())):m.append([1 if j=='#' else 0 for j in r()]);s+=sum(m[i]) | |
a=int(math.sqrt(s)) | |
while not filter(None,m[0]):m.pop(0) | |
x=m[0].index(1) | |
b=sum([sum(v[x:x+a]) for v in m[:a]]) | |
print "Case #%s: %s"%(_,'YES' if b==s else 'NO') |