Last active
January 28, 2022 14:49
-
-
Save bosborne/7fbbe8d6668b9c10f9e87fc1df3f3aca to your computer and use it in GitHub Desktop.
Make a Neo4J database for a collection of wines using py2neo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
''' | |
Use py2neo to create a Neo4J Wine database. | |
Data is stored in YAML files like this: | |
region: Cotes du Rhone | |
country: France | |
comment: Mostly Grenache. Thin, sour, unpleasant. | |
winemaker: Joseph Sabon | |
year: 2006 | |
pk: 331 | |
score: 1 | |
price: 15.00 | |
grapes: Cinsault,Mourvedre,Syrah,Grenache | |
creation_date: 2008-12-31 | |
name: Montueil La Levade | |
winetype: red | |
''' | |
import argparse | |
import glob | |
import re | |
import os | |
import yaml | |
from py2neo import Graph, Node, Relationship, NodeMatcher, RelationshipMatcher | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-p", "--port", default='7687', help="Neo4J port") | |
parser.add_argument("-s", "--server", default='localhost', help="Neo4J host") | |
parser.add_argument("-d", "--directory", required=True, help="Directory with YAML files") | |
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode") | |
args = parser.parse_args() | |
builder = NeoLoader(args.port, args.server, args.directory, args.verbose) | |
builder.setup() | |
builder.readYaml() | |
builder.load() | |
class NeoLoader: | |
def __init__(self, port, server, dir, verbose) -> None: | |
self.port = port | |
self.server = server | |
self.dir = dir | |
self.verbose = verbose | |
''' | |
Connecting to Neo4J in EC2: | |
- Add Custom TCP Security Group rules that open 7474, 7473, and 7687 to 0.0.0.0 | |
- Configure in neo4j.conf: dbms.connectors.default_listen_address:0.0.0.0 | |
By default Neo4J is only listening to localhost. | |
''' | |
def setup(self): | |
url = "bolt://" + self.server + ':' + self.port | |
self.graph = Graph(url, auth=( | |
os.environ["NEO4J_USERNAME"], os.environ["NEO4J_PASSWORD"])) | |
if self.verbose: | |
print("Connected: {0}".format(url)) | |
# Delete all nodes and edges, make query objects | |
self.graph.run('MATCH (n) DETACH DELETE n') | |
self.nodeMatcher = NodeMatcher(self.graph) | |
self.relationshipMatcher = RelationshipMatcher(self.graph) | |
def load(self): | |
winenum = 0 | |
for wine in self.wines: | |
if self.verbose: | |
print("Wine: {0}".format(wine['pk'])) | |
# Make Country node if it doesn't exist | |
c = self.nodeMatcher.match("Country").where(name=wine['country']).first() | |
if c == None: | |
c = self.makeNode("Country", wine['country']) | |
# Make Region node if it doesn't exist | |
r = self.nodeMatcher.match("Region").where(name=wine['region']).first() | |
if r == None: | |
r = self.makeNode("Region", wine['region']) | |
# Connect Country and Region with edge "IsIn" if they're not connected | |
rc = self.relationshipMatcher.match((r, c), r_type='IsIn').first() | |
if rc == None: | |
self.graph.create(Relationship(r, "IsIn", c)) | |
# Make Wine node | |
w = Node("Wine", pk=int(wine['pk']), | |
year=int(wine['year']), | |
score=int(wine['score']), | |
winetype=wine['winetype'], | |
winemaker=wine['winemaker'], | |
creation_date=wine['creation_date']) | |
# Wine may not have a name | |
if wine.get('name'): | |
w['name'] = wine['name'] | |
set(w.labels) | |
self.graph.create(w) | |
if self.verbose: | |
print("Made Wine, pk: {0}".format(wine)) | |
# Connect Wine and Region with edge "IsFrom" | |
self.graph.create(Relationship(w, "IsFrom", r)) | |
for grapename in wine['grapes']: | |
# Make Grape node if it doesn't exist | |
g = self.nodeMatcher.match("Grape").where(name=grapename).first() | |
if g == None: | |
g = self.makeNode("Grape", grapename) | |
# Connect Wine and Grape with edge "MadeFrom" | |
self.graph.create(Relationship(w, "MadeFrom", g)) | |
winenum += 1 | |
if self.verbose: | |
print("Wine number {}".format(winenum)) | |
def makeNode(self, label, name): | |
n = Node(label, name=name) | |
set(n.labels) | |
self.graph.create(n) | |
if self.verbose: | |
print("Created {0}: {1}".format(label, name)) | |
return n | |
''' | |
Read YAML files with wine details. | |
''' | |
def readYaml(self): | |
self.wines = list() | |
for card in glob.glob(self.dir + '/*.yaml'): | |
with open(card) as f: | |
try: | |
y = yaml.safe_load(f) | |
if self.verbose: | |
print("Wine YAML is '{}'".format(y)) | |
except yaml.YAMLError as err: | |
print("Could not read card '{0}': {1}".format(card, err)) | |
y['grapes'] = [g.strip() for g in y['grapes'].split(',')] | |
# Add creation_date if it's missing | |
if y.get('creation_date') is None: | |
y['creation_date'] = str(int(y['year']) + 4) + '-12-22' | |
self.wines.append(y) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment