Created
August 12, 2019 20:00
-
-
Save Adrodoc/2f3739301b140b84a69cff4c50bd0291 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.List; | |
public class Node { | |
int start; | |
int end; | |
Node suffixlink; | |
HashMap<String,Node> children = new HashMap<>(); | |
public Node next; | |
boolean isleaf = false; | |
public Node(int start, int end) { | |
this.start = start; | |
this.end = end; | |
} | |
public String getlabel(String s ) { | |
if(this.start == -1) return "root"; | |
String label = s.substring(this.start, this.end) ; | |
return label; | |
} | |
public Node() { | |
} | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File; | |
import java.nio.file.Files; | |
import java.nio.file.Paths; | |
import java.text.SimpleDateFormat; | |
import java.util.Date; | |
import java.util.HashMap; | |
import java.util.Map; | |
import java.util.Scanner; | |
public class suffixTree { | |
private Node root ; | |
private Node suffixstate; | |
int suffixlink; | |
String s; | |
private ActivePoint activepoint; | |
private int remainder ; | |
public suffixTree() { | |
root = new Node(-1,-1);//split node | |
activepoint = new ActivePoint(root, null, 0); //(active_node,active_edge,active_length) | |
} | |
public void build(String s) { | |
this.s= s; | |
int stringlength = s.length(); | |
for(int i=0 ;i<s.length();i++) { | |
// System.out.println("add: " + s.charAt(i)); | |
insert(s.charAt(i), i, stringlength); | |
} | |
System.out.println("output: "); | |
// printTree(root); | |
} | |
public void printTree(Node n ) { | |
for(String key:n.children.keySet()) { | |
Node child = n.children.get(key); | |
System.out.println(n.getlabel(s)+ "-- " + key + " ---> " + child.getlabel(s));// | |
if(child.suffixlink != null) System.out.println(" suffixlink: " + child.getlabel(s)+ " --> " + child.suffixlink.getlabel(s)); | |
printTree(child); | |
} | |
} | |
public int edgelength (Node node) { | |
if(node == root) { | |
return 0; | |
} | |
return node.end-node.start+1; | |
} | |
public void addsuffixlink(Node node) { | |
if(suffixstate!=null) { | |
suffixstate.suffixlink = node; | |
} | |
suffixstate = node; | |
} | |
//**creat a suffix tree*// | |
public void insert(Character c, int index, int stringlength) { | |
suffixstate = null; | |
remainder++; // reminder+1,activePoint.length+1 | |
suffixlink = 0; | |
String ch = c.toString(); | |
//HashMap<String, Node> child = root.children; | |
Node split ; | |
while(remainder > 0) { | |
System.out.println(" reaminder " + " "+remainder + " activepoint. edge " + activepoint.edge + " " + " character " + " " + c + " activelenth" + " " +activepoint.length ); | |
if(activepoint.length==0) { | |
activepoint.edge = ch; //insert the first character | |
} | |
if(activepoint.point.children.containsKey(activepoint.edge)) { // if a suffix is found in this tree | |
Node next = activepoint.point.children.get(activepoint.edge); | |
//Character n = s.charAt((next.start + activepoint.length)); | |
//String activel = n.toString(); | |
//System.out.println("ap: "+activepoint.edge+" active l: "+activel); | |
if(activepoint.length >=next.end-next.start ) { //if activepoint.length is longer than edge | |
activepoint.length -= next.end-next.start; | |
activepoint.edge += index-activepoint.length; // ? | |
activepoint.point = next; | |
continue; | |
} | |
if(s.charAt(next.start+ activepoint.length) ==c) { //activepoint verschieben | |
activepoint.length++; | |
addsuffixlink(activepoint.point); | |
break; | |
} | |
//if(!activel.equals(ch)) { //***case1: innersplit** // | |
split = new Node(next.start, next.start+activepoint.length); | |
activepoint.point.children.put(activepoint.edge, split); | |
System.out.println("innersplit edge at " + activepoint.edge + ":" ); | |
SimpleDateFormat formatter= new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z"); | |
Date date = new Date(System.currentTimeMillis()); | |
System.out.println(formatter.format(date)); | |
split.children.put(s.substring(index), next); | |
System.out.println("------ " + s.substring(index) + " " + index ); | |
Node newleaf = new Node(index,stringlength); | |
split.children.put(ch, newleaf);// from split to new leaf | |
next.start += activepoint.length; | |
Character chara = s.charAt(next.start); | |
split.children.put(chara.toString(), next); | |
addsuffixlink(split); | |
} | |
else { // **case3 insert a new edge **// | |
Node newleaf= new Node(index,stringlength); | |
activepoint.point.children.put(activepoint.edge, newleaf); | |
addsuffixlink(activepoint.point); | |
} | |
remainder--; | |
if (activepoint.point == root && activepoint.length > 0) { | |
activepoint.length--; | |
Character nextchar = s.charAt(index-remainder+1); | |
activepoint.edge = nextchar.toString(); | |
} else { | |
if(activepoint.point.suffixlink != null) { | |
activepoint.point = activepoint.point.suffixlink; | |
}else { | |
activepoint.point = root; | |
break; | |
} | |
} | |
} | |
System.out.println( " -----------> "); | |
} | |
private class ActivePoint { | |
public Node point; | |
public String edge; | |
public int length; | |
public ActivePoint(Node point, String edge, int length) { | |
this.point = point; | |
this.edge = edge; | |
this.length = length; | |
} | |
} | |
public boolean search (String str, Node n) { | |
//Node n = null; | |
//HashMap<String, Node> child = root.children; | |
//char[] c = str.toCharArray(); | |
// int index ; | |
if(n.getlabel(s).startsWith(str)) { | |
System.out.println("finded" ); | |
return true; | |
} | |
for(String key:n.children.keySet()) { | |
Node child = n.children.get(key); | |
System.out.println(n.getlabel(s)+ "-- " + key + " ---> " + child.getlabel(s));// | |
boolean re= search(str,child); | |
if(re) { | |
return true; | |
} | |
} | |
/* | |
while (root!=null) { | |
for(String key:n.children.keySet()) { | |
Node child = n.children.get(key); | |
System.out.println(n.getlabel(s)+ "-- " + key + " ---> " + child.getlabel(s));// | |
print(child); | |
} | |
for (int i=0;i<str.length();i++) { | |
//char ch = c[i]; | |
//String r = String.valueOf(ch); | |
if (child.containsKey(str.substring(i, i+1)) ) { | |
cur = child.get(str.substring(i, i+1)); | |
child = cur.children; | |
System.out.println(child); | |
// index = cur.start; | |
for (index = cur.start; index<cur.end;index++ ) { | |
if(s.charAt(index) !=str.charAt(i)) { | |
return false ; | |
} | |
} | |
} | |
} | |
}*/ | |
return false; | |
} | |
public static String readFileAsString(String fileName)throws Exception { | |
String data = ""; | |
data = new String(Files.readAllBytes(Paths.get(fileName))); | |
return data; | |
} | |
public static void main(String[] args) throws Exception { | |
suffixTree tree = new suffixTree(); | |
//***test ***// | |
//String t = "abxcabdex$"; | |
// String t = "axabxb"; | |
//String t = "xabxa#babxba$"; | |
//String t = "axabxbxba"; | |
String t = "cocoa"; | |
tree.build(t); | |
// System.out.println(tree.search("cocoa", tree.root)); | |
//System.out.println(tree.search("axabxb")); | |
//System.out.println(tree.search("axabx")); | |
SimpleDateFormat formatter= new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z"); | |
Date date = new Date(System.currentTimeMillis()); | |
System.out.println(formatter.format(date)); | |
String data = readFileAsString("/Users/liweichen/Desktop/doc.txt"); | |
byte[] bytes = data.getBytes("UTF-8"); | |
String s2 = new String(bytes, "UTF-8"); | |
//System.out.println(s2); | |
//System.out.println(data.length()); | |
//tree.build(s2); | |
// 1.zeitmessung 2.search suffix 3.graph | |
//System.out.println(formatter.format(date)); | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment