Skip to content

Instantly share code, notes, and snippets.

@taniki
Created April 3, 2014 12:29
Show Gist options
  • Select an option

  • Save taniki/9953402 to your computer and use it in GitHub Desktop.

Select an option

Save taniki/9953402 to your computer and use it in GitHub Desktop.
import wikipedia visits dataset to neo4j
import json
import requests
dataset = "data/wkp-steps.20140402.json"
neo4j_endpoint = "http://localhost:7474/db/data/cypher"
pages = set()
users = set()
visits = []
def main():
for step_json in open(dataset):
data = json.loads(step_json)
print(data)
pages.add(data["url"])
users.add(data["userId"])
visits.append({
"user_id": data["userId"],
"page_url": data["url"],
"time": data["time"]
})
print("pages: %i" % len(pages))
print("users: %i" % len(users))
import_pages()
import_users()
import_visits()
def import_pages():
r = {}
properties = []
r["query"] = "CREATE (p:Page { properties }) RETURN p"
for url in pages:
p = {
"url": url,
"title": url
}
properties.append(p)
r["params"] = {
"properties": properties
}
result = requests.post(neo4j_endpoint, data=json.dumps(r))
def import_users():
r = {}
properties = []
r["query"] = "CREATE (p:User { properties }) RETURN p"
for id in users:
p = {
"google_id": id,
}
properties.append(p)
r["params"] = {
"properties": properties
}
result = requests.post(neo4j_endpoint, data=json.dumps(r))
def import_visits():
for v in visits:
r = {}
r["query"] = """
MATCH (u:User),(p:Page)
WHERE u.google_id = {user} AND p.url = {page}
CREATE (u)-[r:visited { time: {time} }]->(p)
RETURN r
"""
r["params"] = {
"user": v["user_id"],
"page": v["page_url"],
"time": v["time"]
}
result = requests.post(neo4j_endpoint, data=json.dumps(r))
return
def url2title(url):
return url
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment