Skip to content

Instantly share code, notes, and snippets.

@agmangas
Last active October 16, 2019 10:12
Show Gist options
  • Save agmangas/ad67c484430a1a05ffec62402a533e16 to your computer and use it in GitHub Desktop.
Save agmangas/ad67c484430a1a05ffec62402a533e16 to your computer and use it in GitHub Desktop.
Eleccia Dataset Builder
import json
import math
import pprint
from pymongo import MongoClient
MONGO_URL = "mongodb://devit.fundacionctic.org:27117"
LOG_SCALE = False
SIZE_THRESHOLD = 200
def find_node_party(client, name):
doc = client.politica.seguidores_cuentas_info.find_one({"cuenta": name})
return doc["partido"] if doc else None
def find_node_image(client, name):
doc = client.politica.seguidores_cuentas_info.find_one({"cuenta": name})
return doc["profile_image_url"] if doc else None
def find_node_human_name(client, name):
doc = client.politica.seguidores_cuentas_info.find_one({"cuenta": name})
return doc["nombre"] if doc else None
def main():
client = MongoClient(MONGO_URL)
print("Finding nodes")
node_docs = list(client.politica.seguidores_simplificado_nodos.find({
"size": {"$gte": SIZE_THRESHOLD}
}))
print("Building nodes")
nodes = [{
"id": idx,
"name": doc["id_node"],
"label": doc["id_node"],
"title": doc["id_node"],
"value": math.log(doc["size"], 2) if LOG_SCALE else doc["size"],
"group": find_node_party(client, doc["id_node"])
} for idx, doc in enumerate(node_docs)]
for node in nodes:
image_url = find_node_image(client, node["name"])
if image_url is not None:
node.update({"shape": "circularImage", "image": image_url})
human_name = find_node_human_name(client, node["name"])
if human_name is not None:
node.update({"label": human_name, "title": human_name})
nodes_by_name = {item["name"]: item for item in nodes}
assert len(nodes) == len(nodes_by_name)
print("Building edges")
edge_docs = list(client.politica.seguidores_simplificado_ejes.find())
edges = [{
"id": idx,
"from": nodes_by_name.get(doc["id_node"], {}).get("id", None),
"to": nodes_by_name.get(doc["cuenta"], {}).get("id", None)
} for idx, doc in enumerate(edge_docs)]
edges = [
item for item in edges
if item["from"] is not None and item["to"] is not None
]
print("Final # nodes: {}".format(len(nodes)))
print("Final # edges: {}".format(len(edges)))
with open("./output.json", "w") as fh:
fh.write(json.dumps({"nodes": nodes, "edges": edges}))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment