Skip to content

Instantly share code, notes, and snippets.

@ib84
Created March 11, 2012 15:35
Show Gist options
  • Save ib84/2016844 to your computer and use it in GitHub Desktop.
Save ib84/2016844 to your computer and use it in GitHub Desktop.
hypergraphDB Text Experiments
import test.JHGDB.TestCommons._
import org.hypergraphdb.HGQuery.hg
import scala.collection.JavaConversions._
import org.hypergraphdb.query.AtomTypeCondition
import test.Text.Word3
import java.util.ArrayList
import org.hypergraphdb.indexing.{ByTargetIndexer, ByPartIndexer}
import org.hypergraphdb.{HyperGraph, HGHandle, HGPlainLink}
object HGDBRecursiveTextTest {
val graph:HyperGraph = initializeGraph("localhost", "6378");
def main(args: Array[String]) {
// Part One
val word3word = graph.getTypeSystem().getTypeHandle(classOf[Word3])
graph.getIndexManager().register(new ByPartIndexer(word3word, "word"))
val plainLinkHandle = graph.getTypeSystem().getTypeHandle(classOf[HGPlainLink])
// graph.getIndexManager().register(new ByPartIndexer(plainLinkHandle, "word"))
graph.getIndexManager().register(new ByTargetIndexer(plainLinkHandle, 0));
graph.runMaintenance();
val loremIpsum = "Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
val loremIpsumOrig = "... qui dolorem ipsum, quia dolor sit, amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt, ut labore et dolore magnam aliquam quaerat voluptatem. ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? quis autem vel eum iure reprehenderit, qui in ea voluptate velit esse, quam nihil molestiae consequatur, vel illum, qui dolorem eum fugiat, quo voluptas nulla pariatur? At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga. "
val splitS = splitStringToWords(loremIpsum)
val splitT = splitStringToWords(loremIpsumOrig)
val wordIntersect = splitS.toSet.intersect(splitT.toSet);
val wordDiff : Set[String]= splitS.toSet[String].diff(splitT.toSet[String]);
val wordDiff2 : Set[String]= splitT.toSet[String].diff(splitS.toSet[String]);
val s2Handles = splitS.map(s => hg.assertAtom(graph, new Word3(s), word3word)).toArray[HGHandle]
val t2Handles = splitT.map(t => hg.assertAtom(graph, new Word3(t), word3word)).toArray[HGHandle]
val phrase1Handle = hg.addUnique(graph, new HGPlainLink(s2Handles: _*), plainLinkHandle, hg.link(java.util.Arrays.asList[HGHandle](s2Handles:_*)) )
val phrase2Handle = hg.addUnique(graph, new HGPlainLink(t2Handles: _*), plainLinkHandle, hg.link(java.util.Arrays.asList[HGHandle](t2Handles:_*)) )
// Second part
println("gimme all sentence containing the word \"sit\".\n")
hg.getAll(graph, hg.link(hg.assertAtom(graph, new Word3("sit"), word3word))).asInstanceOf[java.util.List[HGPlainLink]].
foreach{link=> { print("\n"); link.
foreach{handle => print(" " + graph.get(handle).toString.replaceFirst ("KOMMA",",").replaceFirst("POINT","."))}}}
println("\nCount of words by AtomTypeCondition Word3")
println(graph.getAll(new AtomTypeCondition(classOf[Word3])).size)
println("end of test")
}
def splitStringToWords(s: String): Array[String] = s.replaceAllLiterally(", ", " KOMMA ").replaceAllLiterally(". ", " POINT ").replaceAllLiterally("? ", " QUESTIONMARK ").replaceAllLiterally("! ", " EXCLAMATION ").split(" ")
}
@ib84
Copy link
Author

ib84 commented Mar 13, 2012

ah! I forgot to mention, till now there is nothing recursive, but the original idea is that of a HGDB-Text-Typ that is composed of sequences of a) words and/or Text. To be continued.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment