Skip to content

Instantly share code, notes, and snippets.

@hideaki-t
Created July 9, 2013 05:13
Show Gist options
  • Save hideaki-t/5954882 to your computer and use it in GitHub Desktop.
Save hideaki-t/5954882 to your computer and use it in GitHub Desktop.
regex based CSV parser in Java(without newline support)
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CSVParser {
private static final Pattern p = Pattern.compile(",|[^,\"]+|\"(?:[^\"]|\"\")*\"");
public static List<String> parse(final String line, final int max) {
if (max == 0) {
return Arrays.asList(line);
}
final List<String> l = new ArrayList<>(max);
final Matcher m = p.matcher(line);
while (l.size() < max && m.find()) {
final String v = m.group();
if (v.equals(",")) {
continue;
} else if (v.startsWith("\"") && v.endsWith("\"")) {
l.add(v.substring(1,v.length()-1).replace("\"\"", "\""));
} else {
l.add(v);
}
}
if (m.find()) {
l.add(line.substring(m.end()));
}
return l;
}
public static void main(String[] args) {
final String line = "\",\",1,2,3,\",,,\",\"hoge\"\"hoge\",abc,\"\"";
final String[] v = {",", "1", "2", "3", ",,,", "hoge\"hoge", "abc", ""};
final int[] offset = {0, 4, 6, 8, 10, 16, 29, 33};
for (int i = 0; i < 10; i++) {
final List<String> r = CSVParser.parse(line, i);
final int n = r.size() - 1;
assert r.size() == (i < v.length ? i + 1 : v.length): i;
for (int j = 0; j < n; j++) {
assert r.get(j).equals(v[j]);
}
assert i >= v.length || line.substring(offset[i]).equals(r.get(n));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment