Assuming you want to clone a branch "b" ( highly recommended to stick to Release-1.0), do the following :
git clone -b RELEASE-1.0 https://github.com/moses-smt/mosesdecoder.git
import os | |
import Utils | |
question_details = {} | |
top_dir = '/directory/with/your/site/data' | |
with open(os.path.join(top_dir, 'posts.xml')) as posts: | |
for event, elem in etree.iterparse(posts): | |
if Utils.getPostTypeId(elem) != "1": | |
continue |
#give the top level xml directory and this script will return | |
''' | |
num users | |
Num epic users | |
num famous questions | |
num questions | |
num answers | |
''' | |
from lxml import etree |
import time | |
import os | |
import redis | |
path = '/path/to/unigrams' | |
client = redis.Redis(host = 'host-ip-here', port = 6385, db = 0) | |
pipeline = client.pipeline(transaction = False) | |
for f in os.listdir(path): | |
print ' starting with file ', f | |
start = time.time() |
wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-a.gz | |
wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-b.gz | |
wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-c.gz | |
wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-d.gz | |
wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-e.gz | |
wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-f.gz | |
wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-g.gz | |
wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-h.gz | |
wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-i.gz | |
wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-j.gz |
import sys | |
from lxml import etree | |
import Utils | |
with open(sys.argv[1]) as userXml: | |
context = etree.iterparse(userXml) | |
countEpic = 0 | |
for event, elem in context: | |
name = Utils.getBadgeName(elem) |
def hashing(l): | |
n = 0 | |
for letter in l: | |
n = n + ord(letter) | |
return n%3 |
[[ -1.26705603e-04 1.22045489e-01 -1.11196205e-05 0.00000000e+00 | |
-3.09116956e-07 -1.04906557e-05 -3.00129134e-03 -8.41874652e-04 | |
-2.14845603e-01 -8.51142269e-05 1.18718039e-01 2.82917922e-02]] | |
Error is 0.00478333333333 |