Skip to content

Instantly share code, notes, and snippets.

View arcolife's full-sized avatar

Archit Sharma arcolife

View GitHub Profile
@arcolife
arcolife / file1.py
Last active August 29, 2015 13:57 — forked from anonymous/file1.py
mongo upload script
import os
filenames = os.listdir('./_User_sharded/')
f = open('mongo_upload','wb')
f.write('#!/bin/bash\n')
f.write('cd ./_User_sharded/\n')
for filename in filenames:
cmd = ['mongoimport','--db','takezero_raw','--collection','users','--file',filename]
f.write(' '.join(cmd) + "\n")
f = open('_User.json','rb')
import json
data = json.loads(f.read())
f.close()
data
data[0]
data.keys()
data['results'][0]
data['results'][0].keys()
from pyes import *
f = open('_User.json','rb')
import json
data = json.loads(f.read())
f.close()
data
data[0]
data.keys()
data['results'][0]
data['results'][0].keys()
from pyes import *
from pyes import *
conn = ES('localhost:9200')
q = TermQuery("id","0712.1111v1")
results = conn.search( query = q)
results?
results??
results
for r in results:
print r
import untangle
data = untangle.parse('dblp.xml')
import urllib
start = 0
base_url = 'http://export.arxiv.org/api/query?';
search_query = 'all:electron'
max_results = int(raw_input("Enter max result count: "))
query = 'search_query=%s&start=%i&max_results=%i' % (search_query,
start,
max_results)
response = urllib.urlopen(base_url+query).read()
import feedparser
pwd
s1 = open('stopwords.txt','r').read().split()
s1
s2 = open('../scholarec/corpus/stopwords.txt','r').read().split()
s2
set(s1)
set(s2)
set(s1)-set(s2)
l
ls
from __future__ import with_statement # we'll use this later, has to be here
from argparse import ArgumentParser
import requests
from BeautifulSoup import BeautifulStoneSoup as Soup
def parse_sitemap(url):
resp = requests.get(url)
# we didn't get a valid response, bail
@arcolife
arcolife / file1.py
Created January 22, 2014 18:57 — forked from anonymous/file1.py
import pandas as pd
import numpy as np
import sys
sys.version_info
print sys.version_info
pd.version
pd.__version__
t = np.linspace(1,27,27).reshape(3,3,3)
pan = pd.Panel(t)
print pan
@arcolife
arcolife / file1.py
Created November 29, 2013 21:20 — forked from anonymous/file1.py
from pyhashxx import hashxx
import pyparsing
pyparsing?
from pyparsing import Word, alphas
greet = Word( alphas ) + "," + Word( alphas ) + "!"
hello = "Hello, World!"
print (hello, "->", greet.parseString( hello ))
hashxx(b'Hello World!')
hashxx(b'Hello', b' ', b'World!')
hashxx((b'Hello', b' ', b'World!'))