Skip to content

Instantly share code, notes, and snippets.

@hastebrot
Forked from nicolewhite/northwind.markdown
Created April 9, 2021 19:37
Show Gist options
  • Save hastebrot/80a1787904f97396f1afaf0877cc8bf7 to your computer and use it in GitHub Desktop.
Save hastebrot/80a1787904f97396f1afaf0877cc8bf7 to your computer and use it in GitHub Desktop.
MySQL to Neo4j

From SQL to Neo4j: Northwind

SQL Model

Neo4j Model

Get the SQL Dump

The SQL dump was stolen from here and imported into MySQL.

Export from SQL

The .csv files were generated with essentially:

SELECT * FROM Table 
INTO OUTFILE '/tmp/table.csv'
FIELDS ENCLOSED BY '"' TERMINATED BY ',' ESCAPED BY ''
LINES TERMINATED BY '\r\n';

Get the exported .csv files here.

Import into Neo4j

//Constraints
CREATE CONSTRAINT ON (p:Product) ASSERT p.id IS UNIQUE;
CREATE CONSTRAINT ON (c:Category) ASSERT c.id IS UNIQUE;
CREATE CONSTRAINT ON (s:Supplier) ASSERT s.id IS UNIQUE;
CREATE CONSTRAINT ON (o:Order) ASSERT o.id IS UNIQUE;
CREATE CONSTRAINT ON (c:Customer) ASSERT c.id IS UNIQUE;
CREATE CONSTRAINT ON (s:Shipper) ASSERT s.id IS UNIQUE;
CREATE CONSTRAINT ON (e:Employee) ASSERT e.id IS UNIQUE;
CREATE CONSTRAINT ON (t:Territory) ASSERT t.id IS UNIQUE;
CREATE CONSTRAINT ON (r:Region) ASSERT r.id IS UNIQUE;

// Products
LOAD CSV WITH HEADERS FROM 'file:///products.csv' AS line
CREATE (product:Product {id: TOINT(line.ProductID) })
SET product.name = line.ProductName,
    product.unit_price = TOFLOAT(line.UnitPrice)
RETURN product;

// Categories
LOAD CSV WITH HEADERS FROM 'file:///categories.csv' AS line
CREATE (category:Category {id: TOINT(line.CategoryID) })
SET category.name = line.CategoryName,
    category.description = line.Description
RETURN category;

//Suppliers
LOAD CSV WITH HEADERS FROM 'file:///suppliers.csv' AS line
CREATE (supplier:Supplier {id: TOINT(line.SupplierID) })
SET supplier.name = line.CompanyName
RETURN supplier;

//Products and their categories
LOAD CSV WITH HEADERS FROM 'file:///products.csv' AS line
MATCH (product:Product {id: TOINT(line.ProductID) })
MATCH (category:Category {id: TOINT(line.CategoryID) })

CREATE (product)-[:IN_CATEGORY]->(category)
RETURN product, category;

//Products and their suppliers
LOAD CSV WITH HEADERS FROM 'file:///products.csv' AS line
MATCH (product:Product {id: TOINT(line.ProductID) })
MATCH (supplier:Supplier {id: TOINT(line.SupplierID) })
CREATE (supplier)-[:SUPPLIES]->(product)
RETURN product, supplier;

//Orders
LOAD CSV WITH HEADERS FROM 'file:///orders.csv' AS line
CREATE (order:Order {id: TOINT(line.OrderID) })
SET order.freight = TOFLOAT(line.Freight);

//Customers
LOAD CSV WITH HEADERS FROM 'file:///customers.csv' AS line
CREATE (customer:Customer {id: line.CustomerID })
SET customer.name = line.CompanyName
RETURN customer;

//Shippers
LOAD CSV WITH HEADERS FROM 'file:///shippers.csv' AS line
CREATE (shipper:Shipper {id: TOINT(line.ShipperID) })
SET shipper.name = line.CompanyName,
    shipper.phone = line.Phone
RETURN shipper;

//Employees
LOAD CSV WITH HEADERS FROM 'file:///employees.csv' AS line
CREATE (employee:Employee {id: TOINT(line.EmployeeID) })
SET employee.first_name = line.FirstName,
	employee.last_name = line.LastName,
    employee.title = line.Title,
    employee.salary = TOFLOAT(line.Salary)
RETURN employee;

//Employees and reports to
LOAD CSV WITH HEADERS FROM 'file:///employees.csv' AS line
MATCH (employee:Employee {id: TOINT(line.EmployeeID) })
MATCH (manager:Employee {id: TOINT(line.ReportsTo) })
CREATE (employee)-[:REPORTS_TO]->(manager)
RETURN employee, manager;

//Orders and their customers
LOAD CSV WITH HEADERS FROM 'file:///orders.csv' AS line
MATCH (order:Order {id: TOINT(line.OrderID) })
MATCH (customer:Customer {id: line.CustomerID })
CREATE (order)-[:TO]->(customer);

//Orders and their shippers
LOAD CSV WITH HEADERS FROM 'file:///orders.csv' AS line
MATCH (order:Order {id: TOINT(line.OrderID) })
MATCH (shipper:Shipper {id: TOINT(line.ShipVia) })
CREATE (shipper)-[:SHIPPED]->(order);

//Orders and their employees
LOAD CSV WITH HEADERS FROM 'file:///orders.csv' AS line
MATCH (order:Order {id: TOINT(line.OrderID) })
MATCH (employee:Employee {id: TOINT(line.EmployeeID) })
CREATE (employee)-[:SOLD]->(order);

//Orders and their details/products
LOAD CSV WITH HEADERS FROM 'file:///order_details.csv' AS line
MATCH (order:Order {id: TOINT(line.OrderID) })
MATCH (product:Product {id: TOINT(line.ProductID) })
CREATE (order)-[r:INCLUDED]->(product)
SET r.quantity = TOINT(line.Quantity),
    r.unit_price = TOFLOAT(line.UnitPrice),
    r.discount = TOFLOAT(line.Discount);

//Territories
LOAD CSV WITH HEADERS FROM 'file:///territories.csv' AS line
CREATE (t:Territory {id: TOINT(line.TerritoryID) })
SET t.name = line.TerritoryDescription
RETURN t;

//Regions
LOAD CSV WITH HEADERS FROM 'file:///regions.csv' AS line
CREATE (region:Region {id: TOINT(line.RegionID) })
SET region.name = line.RegionDescription
RETURN region;

//Territories and their regions
LOAD CSV WITH HEADERS FROM 'file:///territories.csv' AS line
MATCH (territory:Territory {id: TOINT(line.TerritoryID) })
MATCH (region:Region {id: TOINT(line.RegionID) })
CREATE (territory)-[:BELONGS_TO]->(region)
RETURN territory, region;

//Employees and their territories
LOAD CSV WITH HEADERS FROM 'file:///employee_territories.csv' AS line
MATCH (employee:Employee {id: TOINT(line.EmployeeID) })
MATCH (territory:Territory {id: TOINT(line.TerritoryID) })
CREATE (employee)-[:WORKS_IN]->(territory)
RETURN employee, territory;

Compare SQL Queries to Cypher Queries

What are Steven Buchanan’s top 5 selling products?

SQL

SELECT Products.ProductName AS Product,
       ROUND(SUM((`Order Details`.UnitPrice*Quantity*(1-Discount)/100)*100)) AS Sales
FROM Employees
JOIN Orders ON Employees.EmployeeID = Orders.EmployeeID
JOIN `Order Details` ON Orders.OrderID = `Order Details`.OrderID
JOIN Products ON `Order Details`.ProductID = Products.ProductID
WHERE Employees.FirstName = "Steven" AND Employees.LastName = "Buchanan"
GROUP BY Products.ProductName
ORDER BY Sales DESC LIMIT 5;
+------------------------+-------+
° Product                ° Sales °
+------------------------+-------+
° Cte de Blaye           °  8432 °
° Raclette Courdavault   °  7920 °
° Mozzarella di Giovanni °  5829 °
° Alice Mutton           °  5538 °
° Perth Pasties          °  5084 °
+------------------------+-------+

Cypher

MATCH (e:Employee)-[:SOLD]->(:Order)-[r:INCLUDED]->(p:Product)
WHERE e.first_name = 'Steven' AND e.last_name = 'Buchanan'
RETURN p.name AS Product, 
       ROUND(SUM(r.unit_price*r.quantity*(1-r.discount)/100)*100) AS Sales
ORDER BY Sales DESC LIMIT 5;
+-----------------------------------+
| Product                  | Sales  |
+-----------------------------------+
| "Cte de Blaye"           | 8432   |
| "Raclette Courdavault"   | 7920   |
| "Mozzarella di Giovanni" | 5829   |
| "Alice Mutton"           | 5538   |
| "Perth Pasties"          | 5084   |
+-----------------------------------+

What is the reporting structure two levels deep?

SQL

SELECT CONCAT(Employees.FirstName, ' ', Employees.LastName) AS Name,
       CONCAT(e2.FirstName, ' ', e2.LastName) AS Manager,
       CONCAT(e3.FirstName, ' ', e3.LastName) AS `Manager's Manager`
FROM Employees
JOIN Employees AS e2 ON Employees.ReportsTo = e2.EmployeeID
JOIN Employees AS e3 ON e2.ReportsTo = e3.EmployeeID;
+----------------+-----------------+-------------------+
° Name           ° Manager         ° Manager's Manager °
+----------------+-----------------+-------------------+
° Michael Suyama ° Steven Buchanan ° Andrew Fuller     °
° Robert King    ° Steven Buchanan ° Andrew Fuller     °
° Anne Dodsworth ° Steven Buchanan ° Andrew Fuller     °
+----------------+-----------------+-------------------+

Cypher

MATCH p = (:Employee)-[:REPORTS_TO*2]->(:Employee)
RETURN EXTRACT(n IN NODES(p) | n.first_name + ' ' + n.last_name) AS reporting;
+------------------------------------------------------+
| reporting                                            |
+------------------------------------------------------+
| ["Michael Suyama","Steven Buchanan","Andrew Fuller"] |
| ["Robert King","Steven Buchanan","Andrew Fuller"]    |
| ["Anne Dodsworth","Steven Buchanan","Andrew Fuller"] |
+------------------------------------------------------+

Build an App!

pip install py2neo
pip install flask

main.py

from flask import Flask, request, render_template
from py2neo import Graph, authenticate

authenticate('localhost:7474', 'neo4j', 'secret')
graph = Graph()
cypher = graph.cypher.execute

app = Flask(__name__)

query = """
MATCH (employee:Employee)-[:SOLD]->(:Order)-[r:INCLUDED]->(product:Product)
WHERE employee.first_name = {first_name} AND employee.last_name = {last_name}
RETURN product.name AS Product,
       ROUND(SUM(r.unit_price*r.quantity*(1-r.discount)/100)*100) AS Sales
ORDER BY Sales DESC LIMIT 5;
"""

@app.route('/', methods=['GET', 'POST'])
def index():
    products = []
    if request.method == 'POST':
        first_name = request.form['first_name']
        last_name = request.form['last_name']

        products = cypher(query, {'first_name': first_name, 'last_name': last_name})

    return render_template('index.html', products=products)

if __name__ == '__main__':
    app.run(debug=True)

index.html

<!DOCTYPE html>
<html>
<body>
<form action="." method="post">
    First Name:<input type="text" name="first_name"><br>
    Last Name: <input type="text" name="last_name"><br>
    <input type="submit" value="Search">
</form>

<br>

<table>
    <tr>
        <td><b>Product</b></td>
        <td><b>Sales</b></td>
    </tr>
    {% for product in products %}
    <tr>
        <td>{{ product.Product }}</td>
        <td>{{ product.Sales }}</td>
    </tr>
    {% endfor %}
</table>

</body>
<head lang="en">
    <meta charset="UTF-8">
    <title></title>
</head>
</html>
python main.py
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment