Last active
August 29, 2015 14:15
-
-
Save jberkel/c9c29a04f05956705ec9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.almworks.sqlite4java.SQLiteConnection; | |
import com.almworks.sqlite4java.SQLiteException; | |
import com.almworks.sqlite4java.SQLiteStatement; | |
import de.tudarmstadt.ukp.jwktl.api.IWiktionaryEntry; | |
import de.tudarmstadt.ukp.jwktl.api.IWiktionarySense; | |
import de.tudarmstadt.ukp.jwktl.api.WiktionaryException; | |
import de.tudarmstadt.ukp.jwktl.api.entry.WiktionaryEntry; | |
import de.tudarmstadt.ukp.jwktl.api.entry.WiktionaryPage; | |
import de.tudarmstadt.ukp.jwktl.api.util.ILanguage; | |
import de.tudarmstadt.ukp.jwktl.parser.util.IDumpInfo; | |
import org.slf4j.Logger; | |
import org.slf4j.LoggerFactory; | |
import java.io.File; | |
class SQLiteWiktionaryEdition extends WritableEdition { | |
private final Logger logger = LoggerFactory.getLogger(getClass()); | |
private final ILanguage filterLanguage; | |
private final SQLiteConnection connection; | |
private SQLiteStatement insertStatement; | |
SQLiteWiktionaryEdition(File dbDir, ILanguage filterLanguage) { | |
this.filterLanguage = filterLanguage; | |
connection = new SQLiteConnection(dbDir); | |
try { | |
connection.open(true); | |
createTables(); | |
beginTransaction(); | |
} catch (SQLiteException e) { | |
throw new RuntimeException(e); | |
} | |
} | |
private void beginTransaction() throws SQLiteException { | |
connection.exec("BEGIN TRANSACTION"); | |
} | |
@Override | |
public void saveProperties(IDumpInfo dumpInfo) throws WiktionaryException { | |
} | |
@Override | |
public void savePage(WiktionaryPage page) { | |
for (WiktionaryEntry entry : page.entries()) { | |
if (filterLanguage == null || filterLanguage.equals(entry.getWordLanguage())) { | |
logger.debug("saving " + page); | |
try { | |
SQLiteStatement statement = prepareStatementForEntry(entry); | |
while (statement.step()) { | |
} | |
} catch (SQLiteException e) { | |
throw new RuntimeException(e); | |
} | |
} | |
} | |
} | |
private SQLiteStatement prepareStatementForEntry(IWiktionaryEntry entry) throws SQLiteException { | |
if (insertStatement == null) { | |
insertStatement = connection.prepare("INSERT INTO words (headword, pos, gender, gloss) VALUES(?,?,?,?)"); | |
} else { | |
insertStatement.reset(); | |
} | |
insertStatement.bind(1, entry.getWord()); | |
bindEnum(insertStatement, 2, entry.getPartOfSpeech()); | |
bindEnum(insertStatement, 3, entry.getGender()); | |
StringBuilder sb = new StringBuilder(); | |
for (IWiktionarySense sense : entry.getSenses()) { | |
sb.append(sense.getGloss().getText()) | |
.append(" "); | |
} | |
if (sb.length() > 0) { | |
insertStatement.bind(4, sb.toString()); | |
} | |
return insertStatement; | |
} | |
private void bindEnum(SQLiteStatement statement, int index, Enum<?> enumValue) throws SQLiteException { | |
if (enumValue != null) { | |
statement.bind(index, enumValue.ordinal()); | |
} else { | |
statement.bindNull(index); | |
} | |
} | |
@Override | |
public void commit() { | |
try { | |
doCommit(); | |
beginTransaction(); | |
} catch (SQLiteException e) { | |
throw new RuntimeException(); | |
} | |
} | |
private void doCommit() throws SQLiteException { | |
this.connection.exec("COMMIT"); | |
} | |
@Override | |
public void setLanguage(ILanguage language) { | |
} | |
@Override | |
public void setEntryIndexByTitle(boolean entryIndexByTitle) { | |
} | |
private void createTables() throws SQLiteException { | |
connection.exec("CREATE TABLE IF NOT EXISTS words (" + | |
"headword TEXT," + | |
"pos INTEGER," + | |
"gender INTEGER," + | |
"gloss TEXT" + | |
");"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment