Last active
June 25, 2020 06:46
-
-
Save rr-codes/df538d45eed4bd3bce14e5e43d8a5adb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package com.richardrobinson; | |
| import java.io.BufferedReader; | |
| import java.io.IOException; | |
| import java.lang.annotation.*; | |
| import java.util.HashMap; | |
| import java.util.List; | |
| import java.util.Map; | |
| import java.util.function.Function; | |
| import java.util.stream.Stream; | |
| import static java.util.Map.*; | |
| /** | |
| * This class allows CSV text files to be conveniently and easily parsed into a stream of objects of the specified type | |
| * <p> | |
| * By default, CSVReader supports parsing {@code Integer, Double, String, Boolean} types. Parsers for other types may be added via {@link CSVReader#registerParser(Class, Function)} | |
| * <p> | |
| * <b>Example:</b> | |
| * Given the following class {@code Foo}: | |
| * <pre>{@code class Foo { | |
| * public Integer value; | |
| * @Serializable("alpha") public String s; | |
| * }}</pre> | |
| * <p> | |
| * and a {@link BufferedReader} whose contents are | |
| * <pre>{@code alpha,value | |
| * hello,42 | |
| * world,100}</pre> | |
| * <p> | |
| * a CSVReader instance may be used as follows to parse the reader: | |
| * <pre>{@code final var csv = CSVReader.from(reader, Foo.class, ","); | |
| * final var rows = csv.rows(); // a Stream of Foos | |
| * }</pre> | |
| * | |
| * @param <T> the type of the objects to create. The names of the fields of the class of {@code T} must have a one-to-one correspondence with the names of the headers of the CSV text. Optionally, a field may be annotated with the {@link Serializable} annotation to provide an alternate name for the field to use when parsing. | |
| */ | |
| public class CSVReader<T> { | |
| /** | |
| * This annotation may be applied to any field of {@code T} to provide an alternate name to match with instead of the name of the field. | |
| */ | |
| @Retention(RetentionPolicy.RUNTIME) | |
| @Target({ElementType.FIELD}) | |
| public @interface Serializable { | |
| String value(); | |
| } | |
| private final BufferedReader reader; | |
| private final Class<T> clazz; | |
| private final String delimiter; | |
| private final List<String> headers; | |
| private static final Map<Class<?>, Function<String, ?>> PARSERS = new HashMap<>(ofEntries( | |
| entry(Integer.class, Integer::parseInt), | |
| entry(Double.class, Double::parseDouble), | |
| entry(String.class, s -> s), | |
| entry(Boolean.class, Boolean::parseBoolean) | |
| )); | |
| /** | |
| * Enables support for a type {@code T} for CSVReader instances in addition to the types supported by default | |
| * | |
| * @param cls the Class to add support for (for example, {@code Foo.class}) | |
| * @param parser a Function mapping a {@link String} to a {@code T} | |
| * @param <T> the type corresponding to {@code cls} | |
| */ | |
| public static <T> void registerParser(Class<T> cls, Function<String, T> parser) { | |
| PARSERS.put(cls, parser); | |
| } | |
| private CSVReader(BufferedReader reader, Class<T> clazz, String delimiter) throws IOException { | |
| this.reader = reader; | |
| this.clazz = clazz; | |
| this.delimiter = delimiter; | |
| this.headers = List.of(reader.readLine().split(delimiter)); | |
| } | |
| /** | |
| * Creates a new CSVReader instance from the specified {@code reader}, whose lines may be parsed into instances of type {@code cls}. | |
| * | |
| * @param reader a {@link BufferedReader} containing {@code n} lines of text, with each line containing {@code m} fields separated by a delimiter. | |
| * @param cls the class of the type of object that each row is parsed into. For example, {@code Foo.class} | |
| * @param delimiter the delimiter to use | |
| * @param <T> the type corresponding to {@code clazz} | |
| * @return a new CSVReader instance | |
| * @throws IOException if an I/O error occurs | |
| */ | |
| public static <T> CSVReader<T> from(BufferedReader reader, Class<T> cls, String delimiter) throws IOException { | |
| return new CSVReader<>(reader, cls, delimiter); | |
| } | |
| /** | |
| * Maps each line of the reader to a parsed instance of type {@code T}. The number of fields per line must be no less than the number of fields of class {@code T}. | |
| * | |
| * @return a Stream of instances of type {@code T} corresponding to each line | |
| */ | |
| public Stream<T> rows() { | |
| return reader.lines().map(this::parseRow); | |
| } | |
| private T parseRow(String row) { | |
| final var split = row.split(delimiter); | |
| try { | |
| final var ctor = clazz.getDeclaredConstructor(); | |
| final var inst = ctor.newInstance(); | |
| for (final var field : clazz.getFields()) { | |
| final var annotation = field.getAnnotation(Serializable.class); | |
| final var name = annotation == null ? field.getName() : annotation.value(); | |
| final var index = headers.indexOf(name); | |
| if (index == -1) throw new IllegalArgumentException(); | |
| final var value = PARSERS.get(field.getType()).apply(split[index]); | |
| field.set(inst, value); | |
| } | |
| return inst; | |
| } catch (Exception e) { | |
| e.printStackTrace(); | |
| } | |
| return null; | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.io.BufferedReader | |
| import java.io.Reader | |
| import kotlin.reflect.KClass | |
| import kotlin.reflect.full.findAnnotation | |
| import kotlin.reflect.full.primaryConstructor | |
| import kotlin.reflect.jvm.javaType | |
| /** | |
| * This class allows CSV text files to be conveniently and easily parsed into a stream of objects of the specified type | |
| * | |
| * By default, CSVReader supports parsing `Integer, Double, String, Boolean` types. Parsers for other types may be added via [CSVParser.registerParser] | |
| * | |
| * **Usage**: Suppose we have the following class `Foo`: | |
| * ```kotlin | |
| * data class Foo(val s: String, @Serializable("value") val i: Float = -1.0) | |
| * ``` | |
| * Given a [Reader] whose contents are | |
| * ``` | |
| * s,value | |
| * hello, 3.14 | |
| * world | |
| * ``` | |
| * each line can be parsed into a `Foo` object using | |
| * ```kotlin | |
| * val csv = CSVReader.from<Foo>(reader) | |
| * CSVReader.registerParser(String::toFloat) | |
| * csv.useRows { it.forEach(::println) } // prints each Foo | |
| * ``` | |
| * | |
| * @param Row a type which must satisfy the following properties: | |
| * - It is a _data class_ | |
| * - For each non-optional value parameter, there is a header in the CSV text matching either the name of the parameter or the value specified for the parameter by the [Serializable] annotation | |
| * - The class and each value parameter are `public` | |
| * - The types of the value parameters of `Row` are a combination of the types which support parsing by default. Otherwise, a custom parser has been added via [CSVParser.registerParser] for each applicable type. | |
| * | |
| * @author Richard I Robinson | |
| */ | |
| class CSVParser<Row : Any> @PublishedApi internal constructor( | |
| @PublishedApi internal val reader: BufferedReader, | |
| private val cls: KClass<Row>, | |
| private val delimiter: String = "," | |
| ) { | |
| /** | |
| * This annotation may be applied to any value parameter of a data class to provide an alternate name for the parameter to be matched against the headers of the CSV text | |
| */ | |
| @Retention(AnnotationRetention.RUNTIME) | |
| @Target(AnnotationTarget.VALUE_PARAMETER) | |
| annotation class Serializable(val value: String) | |
| private val headers = reader.readLine().split(delimiter) | |
| /** | |
| * Calls the [block] callback giving it a sequence of all the parsed [Row]s in this file and closes the reader once | |
| * the processing is complete. | |
| * | |
| * @return the value returned by [block]. | |
| * @throws IllegalArgumentException if one or more of the following is true: | |
| * - [Row] is not a data class, | |
| * - The non-optional parameters of [Row] do not have names or annotated names corresponding to the names of the headers of the CSV text | |
| * - The types of the parameters of [Row] are not parsable by default, nor have had parsers registered for them | |
| */ | |
| inline fun <T> useRows(block: (Sequence<Row>) -> T): T = reader.use { | |
| block(it.lineSequence().map(this@CSVParser::parseRow)) | |
| } | |
| @Suppress("UNCHECKED_CAST") | |
| @PublishedApi internal fun parseRow(row: String): Row { | |
| val split = row.split(delimiter) | |
| require(cls.isData) | |
| val ctor = cls.primaryConstructor!! | |
| val parsedParams = ctor.parameters.associateWith { | |
| val name = it.findAnnotation<Serializable>()?.value ?: it.name | |
| val idx = headers.indexOf(name) | |
| require(idx != -1) | |
| if (idx !in split.indices) return@associateWith null | |
| require(it.type.javaType in ParserMap) | |
| ParserMap[it.type.javaType]!!.invoke(split[idx]) | |
| } | |
| return ctor.callBy(parsedParams.filterValues { it != null }) | |
| } | |
| companion object { | |
| @PublishedApi internal val ParserMap = mutableMapOf<Class<*>, (String) -> Any>( | |
| Int::class.java to String::toInt, | |
| Double::class.java to String::toDouble, | |
| String::class.java to { it }, | |
| Boolean::class.java to String::toBoolean | |
| ) | |
| /** | |
| * Globally registers a parser for [T], which may or may not be parsable by default | |
| * @param parser a function mapping a [String] to an arbitrary type [T] | |
| */ | |
| inline fun <reified T : Any> registerParser(noinline parser: (String) -> T) { | |
| ParserMap[T::class.java] = parser | |
| } | |
| /** | |
| * Creates a new CSVReader<T> instance from the specified [reader] whose lines may be parsed into instances of [T] | |
| * | |
| * @param reader a [Reader] containing `n` lines of text, each line containing `m` fields separated by a [delimiter] | |
| * @param delimiter the delimiter to use | |
| */ | |
| inline fun <reified T : Any> from(reader: Reader, delimiter: String = ","): CSVParser<T> { | |
| val br = if (reader is BufferedReader) reader else BufferedReader(reader) | |
| return CSVParser(br, T::class, delimiter) | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment