Skip to content

Instantly share code, notes, and snippets.

@fmbenhassine
Last active May 24, 2018 20:54
Show Gist options
  • Save fmbenhassine/eede1eb6ab35b2a9ab4b to your computer and use it in GitHub Desktop.
Save fmbenhassine/eede1eb6ab35b2a9ab4b to your computer and use it in GitHub Desktop.
Use Easy Batch to do some data mining on paris trees data #EasyBatch
package org.easybatch.flatfile;
public class Tree {
private String adresse;
private float hauteur;
public Tree() {
}
public String getAdresse() {
return adresse;
}
public void setAdresse(String adresse) {
this.adresse = adresse;
}
public float getHauteur() {
return hauteur;
}
public void setHauteur(float hauteur) {
this.hauteur = hauteur;
}
}
package org.easybatch.flatfile;
import org.easybatch.core.api.ComputationalRecordProcessor;
import org.easybatch.core.api.Report;
import org.easybatch.core.filter.HeaderRecordFilter;
import org.easybatch.core.impl.Engine;
import org.easybatch.core.record.StringRecord;
import org.easybatch.flatfile.dsv.DelimitedRecordMapper;
import org.junit.Test;
import java.io.File;
import static org.easybatch.core.impl.EngineBuilder.aNewEngine;
public class TreeParisTest {
//data source : http://opendata.paris.fr/explore/dataset/arbresalignementparis2010/download/?format=csv
@Test
public void testWithRawData() throws Exception {
File dataSource = new File(this.getClass().getResource("/arbresalignementparis2010.csv").toURI());
Engine engine = aNewEngine()
.reader(new FlatFileRecordReader(dataSource))
.filter(new HeaderRecordFilter())
.processor(new ComputationalRecordProcessor<StringRecord, StringRecord, Integer>() {
Integer count = 0;
@Override
public Integer getComputationResult() {
return count;
}
@Override
public StringRecord processRecord(StringRecord record) throws Exception {
if (Float.parseFloat(record.getPayload().split(";")[3]) > 0) {
count++;
}
return record;
}
})
.build();
Report report = engine.call();
Integer total = (Integer) report.getBatchResult();
System.out.println("total = " + total + " calculated in " + report.getFormattedBatchDuration());
}
@Test
public void testWithMappingDataToDomainObject() throws Exception {
File dataSource = new File(this.getClass().getResource("/arbresalignementparis2010.csv").toURI());
DelimitedRecordMapper<Tree> recordMapper = new DelimitedRecordMapper<Tree>(Tree.class, new Integer[]{2, 3}, new String[]{"adresse", "hauteur"});
recordMapper.setDelimiter(";");
Engine engine = aNewEngine()
.reader(new FlatFileRecordReader(dataSource))
.filter(new HeaderRecordFilter())
.mapper(recordMapper)
.processor(new ComputationalRecordProcessor<Tree, Tree, Integer>() {
Integer count = 0;
@Override
public Integer getComputationResult() {
return count;
}
@Override
public Tree processRecord(Tree tree) throws Exception {
if (tree.getHauteur() > 0) {
count++;
}
return tree;
}
})
.build();
Report report = engine.call();
Integer total = (Integer) report.getBatchResult();
System.out.println("total = " + total + " calculated in " + report.getFormattedBatchDuration());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment