Created
January 25, 2013 01:03
-
-
Save nlacasse/4630592 to your computer and use it in GitHub Desktop.
dbpedia inserter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns opal.dbpedia | |
(:use [clojure.tools.logging :only [log]]) | |
(:require [clojure.java.io :as io]) | |
(:import [uk.ac.manchester.cs.owl.owlapi.turtle.parser TurtleParser] | |
[org.neo4j.unsafe.batchinsert BatchInserters | |
LuceneBatchInserterIndexProvider] | |
[org.neo4j.graphdb DynamicRelationshipType])) | |
;; PARSING METHODS | |
(defn get-next-tuple | |
[parser] | |
(let [last-item (atom nil) | |
tuple (atom [])] | |
(while (and (not= "." @last-item) | |
(not= "" @last-item)) | |
(reset! last-item | |
(-> parser | |
(.getNextToken) | |
(.toString))) | |
(swap! tuple conj @last-item)) | |
(when-not (empty? (first @tuple)) ; .getNextToken returns "" once you are out of data | |
@tuple))) | |
(defn seq-of-parser | |
[parser] | |
(if-let [next-tuple (get-next-tuple parser)] | |
(lazy-cat [next-tuple] | |
(seq-of-parser parser)))) | |
(defn parse-file | |
[filename] | |
(seq-of-parser | |
(TurtleParser. | |
(io/input-stream filename)))) | |
;; BATCH UPSERT METHODS | |
(def id-map (atom (transient {}))) | |
(defn insert-resource-node! | |
[inserter res] | |
(if-let [id (get @id-map res)] | |
; If the resource has aleady been added, just return the id. | |
id | |
; Otherwise, add the node for the node, and remember its id for later. | |
(let [id (.createNode inserter {"resource" res})] | |
(swap! id-map #(assoc! % res id)) | |
id))) | |
(defn connect-resource-nodes! | |
[inserter node1 node2 label] | |
(let [relationship (DynamicRelationshipType/withName label)] | |
(.createRelationship inserter node1 node2 relationship nil))) | |
(defn insert-tuple! | |
[inserter tuple] | |
; Get the resource and label names out of the tuple. | |
(let [[resource-1 label resource-2 & _ ] tuple | |
; Upsert the resource nodes. | |
node-1 (insert-resource-node! inserter resource-1) | |
node-2 (insert-resource-node! inserter resource-2)] | |
; Connect the nodes with an edge. | |
(connect-resource-nodes! inserter node-1 node-2 label))) | |
(defn -main [graph-path & files] | |
(let [inserter (BatchInserters/inserter graph-path)] | |
(doseq [file files] | |
(log :debug (str "Loading file: " file)) | |
(let [c (atom 0)] | |
(doseq [tuple (parse-file file)] | |
(if (= (mod @c 10000) 0) | |
(log :debug (str file ": " @c))) | |
(swap! c inc) | |
(insert-tuple! inserter tuple)))) | |
(log :debug "Loading complete.") | |
(log :debug "Shutting down.") | |
(.shutdown inserter) | |
(log :debug "Shutdown complete!"))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
imakshi Mangal
• a minute ago
Hi, I am using Clojure to import dbpedia data into neo4j. I am recieving these errors:
IllegalArgumentException No matching method found: createNode for class org.neo4j.unsafe.batchinsert.BatchInserterImpl clojure.lang.Reflector.invokeMatchingMethod (Reflector.java:53)
IllegalAccessError Transient used by non-owner thread clojure.lang.PersistentArrayMap$
TransientArrayMap.ensureEditable (PersistentArrayMap.java:449)
I am using clojure for the first time. Can you please provide a working example or some tutorial for this.
I am following this article for now, but no luck
http://blog.opal.io/archive/lo....
Please help me.. Thanks