Skip to content

Instantly share code, notes, and snippets.

@rr-codes
Last active June 25, 2020 06:46
Show Gist options
  • Select an option

  • Save rr-codes/df538d45eed4bd3bce14e5e43d8a5adb to your computer and use it in GitHub Desktop.

Select an option

Save rr-codes/df538d45eed4bd3bce14e5e43d8a5adb to your computer and use it in GitHub Desktop.
package com.richardrobinson;
import java.io.BufferedReader;
import java.io.IOException;
import java.lang.annotation.*;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Stream;
import static java.util.Map.*;
/**
* This class allows CSV text files to be conveniently and easily parsed into a stream of objects of the specified type
* <p>
* By default, CSVReader supports parsing {@code Integer, Double, String, Boolean} types. Parsers for other types may be added via {@link CSVReader#registerParser(Class, Function)}
* <p>
* <b>Example:</b>
* Given the following class {@code Foo}:
* <pre>{@code class Foo {
* public Integer value;
* @Serializable("alpha") public String s;
* }}</pre>
* <p>
* and a {@link BufferedReader} whose contents are
* <pre>{@code alpha,value
* hello,42
* world,100}</pre>
* <p>
* a CSVReader instance may be used as follows to parse the reader:
* <pre>{@code final var csv = CSVReader.from(reader, Foo.class, ",");
* final var rows = csv.rows(); // a Stream of Foos
* }</pre>
*
* @param <T> the type of the objects to create. The names of the fields of the class of {@code T} must have a one-to-one correspondence with the names of the headers of the CSV text. Optionally, a field may be annotated with the {@link Serializable} annotation to provide an alternate name for the field to use when parsing.
*/
public class CSVReader<T> {
/**
* This annotation may be applied to any field of {@code T} to provide an alternate name to match with instead of the name of the field.
*/
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD})
public @interface Serializable {
String value();
}
private final BufferedReader reader;
private final Class<T> clazz;
private final String delimiter;
private final List<String> headers;
private static final Map<Class<?>, Function<String, ?>> PARSERS = new HashMap<>(ofEntries(
entry(Integer.class, Integer::parseInt),
entry(Double.class, Double::parseDouble),
entry(String.class, s -> s),
entry(Boolean.class, Boolean::parseBoolean)
));
/**
* Enables support for a type {@code T} for CSVReader instances in addition to the types supported by default
*
* @param cls the Class to add support for (for example, {@code Foo.class})
* @param parser a Function mapping a {@link String} to a {@code T}
* @param <T> the type corresponding to {@code cls}
*/
public static <T> void registerParser(Class<T> cls, Function<String, T> parser) {
PARSERS.put(cls, parser);
}
private CSVReader(BufferedReader reader, Class<T> clazz, String delimiter) throws IOException {
this.reader = reader;
this.clazz = clazz;
this.delimiter = delimiter;
this.headers = List.of(reader.readLine().split(delimiter));
}
/**
* Creates a new CSVReader instance from the specified {@code reader}, whose lines may be parsed into instances of type {@code cls}.
*
* @param reader a {@link BufferedReader} containing {@code n} lines of text, with each line containing {@code m} fields separated by a delimiter.
* @param cls the class of the type of object that each row is parsed into. For example, {@code Foo.class}
* @param delimiter the delimiter to use
* @param <T> the type corresponding to {@code clazz}
* @return a new CSVReader instance
* @throws IOException if an I/O error occurs
*/
public static <T> CSVReader<T> from(BufferedReader reader, Class<T> cls, String delimiter) throws IOException {
return new CSVReader<>(reader, cls, delimiter);
}
/**
* Maps each line of the reader to a parsed instance of type {@code T}. The number of fields per line must be no less than the number of fields of class {@code T}.
*
* @return a Stream of instances of type {@code T} corresponding to each line
*/
public Stream<T> rows() {
return reader.lines().map(this::parseRow);
}
private T parseRow(String row) {
final var split = row.split(delimiter);
try {
final var ctor = clazz.getDeclaredConstructor();
final var inst = ctor.newInstance();
for (final var field : clazz.getFields()) {
final var annotation = field.getAnnotation(Serializable.class);
final var name = annotation == null ? field.getName() : annotation.value();
final var index = headers.indexOf(name);
if (index == -1) throw new IllegalArgumentException();
final var value = PARSERS.get(field.getType()).apply(split[index]);
field.set(inst, value);
}
return inst;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
import java.io.BufferedReader
import java.io.Reader
import kotlin.reflect.KClass
import kotlin.reflect.full.findAnnotation
import kotlin.reflect.full.primaryConstructor
import kotlin.reflect.jvm.javaType
/**
* This class allows CSV text files to be conveniently and easily parsed into a stream of objects of the specified type
*
* By default, CSVReader supports parsing `Integer, Double, String, Boolean` types. Parsers for other types may be added via [CSVParser.registerParser]
*
* **Usage**: Suppose we have the following class `Foo`:
* ```kotlin
* data class Foo(val s: String, @Serializable("value") val i: Float = -1.0)
* ```
* Given a [Reader] whose contents are
* ```
* s,value
* hello, 3.14
* world
* ```
* each line can be parsed into a `Foo` object using
* ```kotlin
* val csv = CSVReader.from<Foo>(reader)
* CSVReader.registerParser(String::toFloat)
* csv.useRows { it.forEach(::println) } // prints each Foo
* ```
*
* @param Row a type which must satisfy the following properties:
* - It is a _data class_
* - For each non-optional value parameter, there is a header in the CSV text matching either the name of the parameter or the value specified for the parameter by the [Serializable] annotation
* - The class and each value parameter are `public`
* - The types of the value parameters of `Row` are a combination of the types which support parsing by default. Otherwise, a custom parser has been added via [CSVParser.registerParser] for each applicable type.
*
* @author Richard I Robinson
*/
class CSVParser<Row : Any> @PublishedApi internal constructor(
@PublishedApi internal val reader: BufferedReader,
private val cls: KClass<Row>,
private val delimiter: String = ","
) {
/**
* This annotation may be applied to any value parameter of a data class to provide an alternate name for the parameter to be matched against the headers of the CSV text
*/
@Retention(AnnotationRetention.RUNTIME)
@Target(AnnotationTarget.VALUE_PARAMETER)
annotation class Serializable(val value: String)
private val headers = reader.readLine().split(delimiter)
/**
* Calls the [block] callback giving it a sequence of all the parsed [Row]s in this file and closes the reader once
* the processing is complete.
*
* @return the value returned by [block].
* @throws IllegalArgumentException if one or more of the following is true:
* - [Row] is not a data class,
* - The non-optional parameters of [Row] do not have names or annotated names corresponding to the names of the headers of the CSV text
* - The types of the parameters of [Row] are not parsable by default, nor have had parsers registered for them
*/
inline fun <T> useRows(block: (Sequence<Row>) -> T): T = reader.use {
block(it.lineSequence().map(this@CSVParser::parseRow))
}
@Suppress("UNCHECKED_CAST")
@PublishedApi internal fun parseRow(row: String): Row {
val split = row.split(delimiter)
require(cls.isData)
val ctor = cls.primaryConstructor!!
val parsedParams = ctor.parameters.associateWith {
val name = it.findAnnotation<Serializable>()?.value ?: it.name
val idx = headers.indexOf(name)
require(idx != -1)
if (idx !in split.indices) return@associateWith null
require(it.type.javaType in ParserMap)
ParserMap[it.type.javaType]!!.invoke(split[idx])
}
return ctor.callBy(parsedParams.filterValues { it != null })
}
companion object {
@PublishedApi internal val ParserMap = mutableMapOf<Class<*>, (String) -> Any>(
Int::class.java to String::toInt,
Double::class.java to String::toDouble,
String::class.java to { it },
Boolean::class.java to String::toBoolean
)
/**
* Globally registers a parser for [T], which may or may not be parsable by default
* @param parser a function mapping a [String] to an arbitrary type [T]
*/
inline fun <reified T : Any> registerParser(noinline parser: (String) -> T) {
ParserMap[T::class.java] = parser
}
/**
* Creates a new CSVReader<T> instance from the specified [reader] whose lines may be parsed into instances of [T]
*
* @param reader a [Reader] containing `n` lines of text, each line containing `m` fields separated by a [delimiter]
* @param delimiter the delimiter to use
*/
inline fun <reified T : Any> from(reader: Reader, delimiter: String = ","): CSVParser<T> {
val br = if (reader is BufferedReader) reader else BufferedReader(reader)
return CSVParser(br, T::class, delimiter)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment