Skip to content

Instantly share code, notes, and snippets.

@neuro-sys
Last active December 11, 2015 07:08
Show Gist options
  • Select an option

  • Save neuro-sys/4563703 to your computer and use it in GitHub Desktop.

Select an option

Save neuro-sys/4563703 to your computer and use it in GitHub Desktop.
Dice's co-efficient
import java.util.*;
/**
* Source: http://www.codeproject.com/Articles/147230/Simple-Fuzzy-String-Similarity-in-Java
* By George Stragand, 17 Jan 2011
*/
public class Dice {
public static List<char[]> bigram(String input) {
ArrayList<char[]> bigram = new ArrayList<char[]>();
for (int i = 0; i < input.length() - 1; i++) {
char[] chars = new char[2];
chars[0] = input.charAt(i);
chars[1] = input.charAt(i+1);
bigram.add(chars);
}
return bigram;
}
public static double dice(List<char[]> bigram1, List<char[]> bigram2) {
List<char[]> copy = new ArrayList<char[]>(bigram2);
int matches = 0;
for (int i = bigram1.size(); --i >= 0;) {
char[] bigram = bigram1.get(i);
for (int j = copy.size(); --j >= 0;) {
char[] toMatch = copy.get(j);
if (bigram[0] == toMatch[0] && bigram[1] == toMatch[1]) {
copy.remove(j);
matches += 2;
break;
}
}
}
return (double) matches / (bigram1.size() + bigram2.size());
}
public static void main(String[] args) {
String a = "Selam merhaba nasilsiniz";
String b = "Slm nbr nslsnz ii u?";
double dc = dice(bigram(a), bigram(b));
System.out.println((int) (dc*100) + "%");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment