Skip to content

Instantly share code, notes, and snippets.

@john-h-kastner
Last active August 29, 2015 14:18
Show Gist options
  • Save john-h-kastner/27e1777d50cb8e28cd7e to your computer and use it in GitHub Desktop.
Save john-h-kastner/27e1777d50cb8e28cd7e to your computer and use it in GitHub Desktop.
package unpackingsentence;
import java.util.Comparator;
import java.util.Arrays;
import java.util.List;
import java.lang.StringBuilder;
public class MarkovChain {
private final int[][] characterFrequencies;
private int dataPoints;
public final static int WORD_DELIMITER = -1;
public MarkovChain(){
characterFrequencies = new int[26][27];
dataPoints = 0;
}
private int inRange(int c){
if(c==WORD_DELIMITER){
return 26;
}
return Character.toLowerCase(c)-'a';
}
public void train(List<String> words){
words.stream().forEach(this :: addWord);
}
public void addWord(String word){
char[] chars = word.toCharArray();
for(int i = 0; i< chars.length-1;i++){
characterFrequencies[inRange(chars[i])][inRange(chars[i+1])]++;
}
characterFrequencies[inRange(chars[chars.length-1])][inRange(WORD_DELIMITER)]++;
dataPoints+=word.length()+1;
}
public double chanceOf(int from, int to){
return (double)characterFrequencies[inRange(from)][inRange(to)]/dataPoints;
}
public Comparator<Character> comparingFrequency(char from){
return Comparator.comparingDouble(c->(double)characterFrequencies[inRange(from)][inRange(c)]/dataPoints);
}
@Override
public String toString(){
StringBuilder builder = new StringBuilder();
builder.append(dataPoints).append("\n");
for(int[] ns : characterFrequencies){
for(int n:ns){
builder.append(n).append(" ");
}
builder.append("\n");
}
return builder.toString();
}
}
package unpackingsentence;
import java.awt.Point;
public class PackedFile {
public final char[][] packedSentence;
public final Point startingPosition;
public PackedFile(char[][] sentence,int r, int c){
packedSentence = sentence;
startingPosition = new Point(r,c);
}
}
package unpackingsentence;
import java.awt.Point;
import java.util.List;
import java.util.ArrayList;
import java.util.stream.Stream;
import java.util.Comparator;
import java.lang.StringBuilder;
import java.util.Optional;
import java.util.Collections;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.File;
import java.nio.file.Paths;
import java.nio.file.Files;
import java.io.BufferedReader;
import java.io.FileReader;
public class Unpacker {
private final MarkovChain characterProbabilities;
public Unpacker(MarkovChain chain){
characterProbabilities = chain;
}
public List<String> unpack(PackedFile packed){
return unpack(packed.packedSentence,packed.startingPosition);
}
public List<String> unpack(char[][] packed, Point start){
return unpack(packed,start, new ArrayList<>(packed.length*packed[0].length));
}
public List<String> unpack(char[][] packed, Point start, List<Point> usedPoints){
usedPoints.add(start);
char startChar = packed[start.x][start.y];
Point[] moves = {new Point(start.x,start.y-1),
new Point(start.x,start.y+1),
new Point(start.x-1,start.y),
new Point(start.x+1,start.y)};
Comparator<Character> frequencyComparator = characterProbabilities.comparingFrequency(startChar);
Point[] vistOrder = Stream
.of(moves)
.filter(e -> !usedPoints.contains(e))
.filter(p -> p.x >= 0 && p.x < packed.length && p.y >= 0 && p.y < packed[0].length)
.sorted((p0,p1) -> frequencyComparator.compare(packed[p1.x][p1.y],packed[p0.x][p0.y]))
.toArray(Point[] :: new);
if(vistOrder.length == 0) {
if(usedPoints.size() != packed.length*packed[0].length){
return Collections.emptyList();
} else {
List<String> unpacked = new ArrayList<>();
unpacked.add(buildString(packed,usedPoints));
return unpacked;
}
}
List<String> allUnpacked = new ArrayList<>();
for(Point toVisit : vistOrder){
List<String> unpacked = unpack(packed,toVisit,new ArrayList<Point>(usedPoints));
if(!unpacked.isEmpty()){
allUnpacked.addAll(unpacked);
}
}
return allUnpacked;
}
public String buildString(char[][] packed,List<Point> usedPoints){
return usedPoints
.stream()
.map(p -> packed[p.x][p.y])
.reduce("",((str,c) -> str + c),String :: concat);
}
public static PackedFile parsePacked(String path){
try(
BufferedReader read = new BufferedReader(new FileReader(new File(path)));
){
String[] header = read.readLine().split(" ");
int rows = Integer.parseInt(header[0]);
char[][] packedSentence = new char[rows][0];
int startRow = Integer.parseInt(header[1]);
int startColumn = Integer.parseInt(header[2]);
for(int i=0;i<rows;i++){
int[] chars = read.readLine()
.chars()
.filter(Character :: isLetter)
.toArray();
packedSentence[i] = new char[chars.length];
for(int j =0;j<chars.length;j++){
packedSentence[i][j] = (char) chars[j];
}
}
return new PackedFile(packedSentence,startRow-1,startColumn-1);
} catch (IOException ioe) {
System.err.println("Exception Occurred while reading packed sentence!");
return null;
}
}
public static void main(String[] args){
String packedFile = args[0];
String trainingFile = args[1];
PackedFile packed = parsePacked(packedFile);
MarkovChain chain = new MarkovChain();
List<String> trainingWords;
try {
trainingWords = Files.readAllLines(Paths.get(trainingFile));
} catch (IOException ioe) {
trainingWords = new ArrayList<>();
}
chain.train(trainingWords);
Unpacker unpacker = new Unpacker(chain);
unpacker.unpack(packed).stream().forEach(System.out :: println);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment