Rohit Dholakia rohitdholakia

Get Moses

Assuming you want to clone a branch "b" ( highly recommended to stick to Release-1.0), do the following :

git clone -b RELEASE-1.0 https://github.com/moses-smt/mosesdecoder.git

	from __future__ import division
	from collections import defaultdict

	accepted_answers_for_tags = defaultdict(int)
	count = 0
	for key in question_details.iterkeys():
	if question_details[key]['acceptedId']:
	for t in question_details[key]['tags']:
	accepted_answers_for_tags[t] += 1

	from collections import defaultdict
	tags_dict = defaultdict(int)

	for key in question_details.iterkeys():
	for t in question_details[key]['tags']:
	tags_dict[t] += 1

	print sorted(tags_dict.iteritems(), key = lambda x: x[1], reverse = True)[:20]


	import os
	import Utils
	question_details = {}
	top_dir = '/directory/with/your/site/data'

	with open(os.path.join(top_dir, 'posts.xml')) as posts:
	for event, elem in etree.iterparse(posts):
	if Utils.getPostTypeId(elem) != "1":
	continue

	#give the top level xml directory and this script will return
	'''
	num users
	Num epic users
	num famous questions
	num questions
	num answers

	'''
	from lxml import etree

	import time
	import os
	import redis
	path = '/path/to/unigrams'
	client = redis.Redis(host = 'host-ip-here', port = 6385, db = 0)
	pipeline = client.pipeline(transaction = False)
	for f in os.listdir(path):

	print ' starting with file ', f
	start = time.time()

	wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-a.gz
	wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-b.gz
	wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-c.gz
	wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-d.gz
	wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-e.gz
	wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-f.gz
	wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-g.gz
	wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-h.gz
	wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-i.gz
	wget http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-1gram-20120701-j.gz

	import sys
	from lxml import etree
	import Utils

	with open(sys.argv[1]) as userXml:
	context = etree.iterparse(userXml)
	countEpic = 0

	for event, elem in context:
	name = Utils.getBadgeName(elem)