Assuming you want to clone a branch "b" ( highly recommended to stick to Release-1.0), do the following :
git clone -b RELEASE-1.0 https://github.com/moses-smt/mosesdecoder.git
| import os | |
| import Utils | |
| question_details = {} | |
| top_dir = '/directory/with/your/site/data' | |
| with open(os.path.join(top_dir, 'posts.xml')) as posts: | |
| for event, elem in etree.iterparse(posts): | |
| if Utils.getPostTypeId(elem) != "1": | |
| continue |
| #give the top level xml directory and this script will return | |
| ''' | |
| num users | |
| Num epic users | |
| num famous questions | |
| num questions | |
| num answers | |
| ''' | |
| from lxml import etree |
| import time | |
| import os | |
| import redis | |
| path = '/path/to/unigrams' | |
| client = redis.Redis(host = 'host-ip-here', port = 6385, db = 0) | |
| pipeline = client.pipeline(transaction = False) | |
| for f in os.listdir(path): | |
| print ' starting with file ', f | |
| start = time.time() |
| wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-a.gz | |
| wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-b.gz | |
| wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-c.gz | |
| wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-d.gz | |
| wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-e.gz | |
| wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-f.gz | |
| wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-g.gz | |
| wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-h.gz | |
| wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-i.gz | |
| wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-j.gz |
| import sys | |
| from lxml import etree | |
| import Utils | |
| with open(sys.argv[1]) as userXml: | |
| context = etree.iterparse(userXml) | |
| countEpic = 0 | |
| for event, elem in context: | |
| name = Utils.getBadgeName(elem) |
| def hashing(l): | |
| n = 0 | |
| for letter in l: | |
| n = n + ord(letter) | |
| return n%3 |
| [[ -1.26705603e-04 1.22045489e-01 -1.11196205e-05 0.00000000e+00 | |
| -3.09116956e-07 -1.04906557e-05 -3.00129134e-03 -8.41874652e-04 | |
| -2.14845603e-01 -8.51142269e-05 1.18718039e-01 2.82917922e-02]] | |
| Error is 0.00478333333333 |