-
Prerequisite: OpenJDK 11. If you run ubuntu with root you can use
apt-get install default-jdk
Otherwise, consider using docker : https://hub.docker.com/_/openjdk
Third option, not recommended, you can install Java in userspace, you will have to play around with terminal configuration. Here is a starting point under "Installing OpenJDK Manually": https://dzone.com/articles/installing-openjdk-11-on-ubuntu-1804-for-real
-
Get Neo4j v4.0.X Community server and install Neosemantics plugin. This will also configure the Neosemantics plugin
./get-neo4j.sh
if you want you can uncomment the line
#dbms.default_listen_address=0.0.0.0
in
neo4j-server/conf/neo4j.conf
to open connection to clients running other thanlocalhost
-
Download Yago4 Files, uncompress, ready to be imported
./download-yago.sh yago_files.txt
-
Load data
./import-yago.sh
-
Query the data
-
Count nodes
${NEO4J_HOME}/bin/cypher-shell -u neo4j -p 'admin' "MATCH (r:Resource) RETURN COUNT(r)"
-
Count edges
${NEO4J_HOME}/bin/cypher-shell -u neo4j -p 'admin' "MATCH (r1:Resource)-[l]->(r2:Resource) RETURN COUNT(l)"
-
Distinct relationship types
${NEO4J_HOME}/bin/cypher-shell -u neo4j -p 'admin' "CALL db.relationshipTypes() YIELD relationshipType RETURN relationshipType"
-
Get Node properties
${NEO4J_HOME}/bin/cypher-shell -u neo4j -p 'admin' "MATCH (a:Resource ) UNWIND keys(a) AS key WITH DISTINCT key ORDER by key RETURN key"
-
Example node-edges
${NEO4J_HOME}/bin/cypher-shell -u neo4j -p 'admin' "MATCH (r1:Resource)-[l]->(r2:Resource) RETURN r1, l, r2 LIMIT 20"
-
-
Convert array of labels to single label
${NEO4J_HOME}/bin/cypher-shell -u neo4j -p 'admin' 'CALL apoc.periodic.iterate( "MATCH (a:Resource ) WHERE EXISTS (a.rdfs__label)", "SET a.label = a.rdfs__label[0]", {batchSize:10000, parallel:true})'
-
Create a full text index:
${NEO4J_HOME}/bin/cypher-shell -u neo4j -p 'admin' 'CALL db.index.fulltext.createNodeIndex("names",["Resource"], ["rdfs__label", "sch__alternateName", "sch__iataCode", "sch__icaoCode"])';
and test the index
${NEO4J_HOME}/bin/cypher-shell -u neo4j -p 'admin' 'CALL db.index.fulltext.queryNodes("names", "Full Metal Jacket") YIELD node, score RETURN node, score LIMIT 5;' ${NEO4J_HOME}/bin/cypher-shell -u neo4j -p 'admin' 'CALL db.index.fulltext.queryNodes("names", "Obama") YIELD node, score RETURN node, score;'
Last active
April 13, 2024 00:17
-
-
Save kuzeko/9b5dbdf52102a59b9d3865c70a8576a0 to your computer and use it in GitHub Desktop.
Yago4 in Neo4j
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
export DATA_DIR="${PWD}/data" | |
export NEO4J_HOME=${PWD}/neo4j-server | |
export NEO4J_IMPORT="${NEO4J_HOME}/import" | |
mkdir -p -v "${DATA_DIR}" | |
mkdir -p -v "${NEO4J_IMPORT}" | |
if [ "$#" -ne 1 ]; then | |
echo "Illegal number of parameters." | |
exit 1 | |
fi | |
if [ -d $DATA_DIR ] | |
then | |
echo "Downloading files..." | |
rm -v ${DATA_DIR}/*.* || true | |
while read -r line; do | |
[[ "$line" =~ ^#.*$ ]] && continue | |
wget -P ${DATA_DIR}/ "http://yago.r2.enst.fr/data/yago4/full/2020-02-24/"$line | |
if [[ $line == *"yago-wd-labels"* ]] | |
then | |
echo "Keeping only english labels" | |
zcat ${DATA_DIR}/${line##*/} | grep --color=never -e "@en\s." | grep --color=never -v -f ./exclude.txt > ${DATA_DIR}/yago-wd-labels-en.nt | |
rm ${DATA_DIR}/${line##*/} | |
filename="yago-wd-labels-en.nt" | |
elif [[ $line == *"yago-wd-facts"* ]] | |
then | |
echo "excluding useless only english labels" | |
zcat ${DATA_DIR}/${line##*/} | grep --color=never -v -f ./exclude.txt > ${DATA_DIR}/yago-wd-facts-lite.nt | |
rm ${DATA_DIR}/${line##*/} | |
filename="yago-wd-facts-lite.nt" | |
else | |
gzip -d ${DATA_DIR}/${line##*/} | |
filename=$(basename -- "${DATA_DIR}/${line##*/}") | |
filename="${filename%.*}" | |
fi | |
split -l 5000000 --numeric-suffixes ${DATA_DIR}/${filename} ${DATA_DIR}/part-${filename} | |
rm ${DATA_DIR}/${filename} | |
done < $1 | |
mv ${DATA_DIR}/part-*.nt* ${NEO4J_IMPORT}/ | |
chmod -R 777 ${NEO4J_IMPORT} | |
else | |
echo "No destination folder ${DATA_DIR}" | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
http://schema.org/pagination | |
2000/01/rdf-schema#comment | |
schema.org/citation |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
ulimit -n 65535 | |
NEO4J_VERSION=4.0.4 | |
rm -rf neo4j-server | |
wget https://neo4j.com/artifact.php?name=neo4j-community-$NEO4J_VERSION-unix.tar.gz -O neo4j.tar.gz | |
tar xf neo4j.tar.gz | |
mv neo4j-community-$NEO4J_VERSION neo4j-server | |
rm neo4j.tar.gz | |
export NEO4J_HOME=${PWD}/neo4j-server | |
export NEO4J_DATA_DIR=${NEO4J_HOME}/data | |
rm -rf $NEO4J_DATA_DIR | |
if [ ! -f ${NEO4J_HOME}/plugins/neosemantics-4.0.0.1.jar ] | |
then | |
echo "Downloading Neo4j RDF plugin..." | |
wget -P ${NEO4J_HOME}/plugins/ https://github.com/neo4j-labs/neosemantics/releases/download/4.0.0.1/neosemantics-4.0.0.1.jar | |
fi | |
echo "Installing Neo4j RDF plugin..." | |
echo 'dbms.unmanaged_extension_classes=n10s.endpoint=/rdf' >> ${NEO4J_HOME}/conf/neo4j.conf | |
${NEO4J_HOME}/bin/neo4j start | |
sleep 10 | |
$NEO4J_HOME/bin/neo4j-admin set-initial-password admin | |
$NEO4J_HOME/bin/neo4j restart | |
sleep 10 | |
echo "Creating index" | |
${NEO4J_HOME}/bin/cypher-shell -u neo4j -p 'admin' "CREATE CONSTRAINT n10s_unique_uri ON (r:Resource) ASSERT r.uri IS UNIQUE;" | |
${NEO4J_HOME}/bin/cypher-shell -u neo4j -p 'admin' 'call n10s.graphconfig.init( { handleMultival: "ARRAY", handleVocabUris: "SHORTEN", keepLangTag: false, handleRDFTypes: "NODES" })' | |
echo Neo4j log: | |
tail -n 12 $NEO4J_HOME/logs/neo4j.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
export NEO4J_HOME=${PWD}/neo4j-server | |
export NEO4J_IMPORT="${NEO4J_HOME}/import" | |
export NEO4J_DB_DIR=$NEO4J_HOME/data/databases/graph.db | |
ulimit -n 65535 | |
echo "Importing" | |
for file in ${NEO4J_IMPORT}/*.nt*; do | |
# Extracting filename | |
echo $file | |
filename="$(basename "${file}")" | |
echo "Importing $filename from ${NEO4J_HOME}" | |
${NEO4J_HOME}/bin/cypher-shell -u neo4j -p 'admin' "CALL n10s.rdf.import.fetch(\"file://${NEO4J_HOME}/import/$filename\",\"N-Triples\");" | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#yago-wd-annotated-facts.ntx.gz | |
#yago-wd-class.nt.gz | |
yago-wd-facts.nt.gz | |
#yago-wd-full-types.nt.gz | |
yago-wd-labels.nt.gz | |
#yago-wd-sameAs.nt.gz | |
#yago-wd-schema.nt.gz | |
#yago-wd-shapes.nt.gz | |
yago-wd-simple-types.nt.gz |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment