Skip to content

Instantly share code, notes, and snippets.

@yusufsyaifudin
Last active March 11, 2016 12:19
Show Gist options
  • Save yusufsyaifudin/4af421ccf269b11205ac to your computer and use it in GitHub Desktop.
Save yusufsyaifudin/4af421ccf269b11205ac to your computer and use it in GitHub Desktop.
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>test.tokenizer</groupId>
<artifactId>tokenizerId</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>tokenizerId</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<repositories>
<repository>
<id>yusufsyaifudin</id>
<name>tokenizer-id</name>
<url>https://github.com/yusufsyaifudin/tokenizer-id/raw/master/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>yusuf.nlp</groupId>
<artifactId>tokenizerid</artifactId>
<version>1.0.0</version>
<scope>compile</scope>
</dependency>
</dependencies>
</project>
package test.tokenizer.tokenizerId;
import java.util.ArrayList;
import yusufs.nlp.tokenizerid.Tokenizer;;
/**
* Hello world!
*
*/
public class TestTokenizer
{
public static void main( String[] args )
{
String text = "Kalimat satu. Kalimat dua. \"Selamat pagi!\" kata X.";
Tokenizer tokenizer = new Tokenizer();
ArrayList<String> sentences = tokenizer.extractSentence(text);
System.out.println("Teks menjadi kalimat: ");
System.out.println(sentences);
String sentence = "\"Selamat pagi!\" kata X.";
Boolean withPunct = true; // apakah tanda baca diikut-sertakan atau tidak
ArrayList<String> tokens = tokenizer.tokenize(sentence, withPunct);
String tokensToString = tokenizer.tokenizeToString(sentence, withPunct);
System.out.println("Token: ");
System.out.println(tokens);
System.out.println("Token ke string: ");
System.out.println(tokensToString);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment