Last active
May 24, 2018 20:54
-
-
Save fmbenhassine/eede1eb6ab35b2a9ab4b to your computer and use it in GitHub Desktop.
Use Easy Batch to do some data mining on paris trees data #EasyBatch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.easybatch.flatfile; | |
public class Tree { | |
private String adresse; | |
private float hauteur; | |
public Tree() { | |
} | |
public String getAdresse() { | |
return adresse; | |
} | |
public void setAdresse(String adresse) { | |
this.adresse = adresse; | |
} | |
public float getHauteur() { | |
return hauteur; | |
} | |
public void setHauteur(float hauteur) { | |
this.hauteur = hauteur; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.easybatch.flatfile; | |
import org.easybatch.core.api.ComputationalRecordProcessor; | |
import org.easybatch.core.api.Report; | |
import org.easybatch.core.filter.HeaderRecordFilter; | |
import org.easybatch.core.impl.Engine; | |
import org.easybatch.core.record.StringRecord; | |
import org.easybatch.flatfile.dsv.DelimitedRecordMapper; | |
import org.junit.Test; | |
import java.io.File; | |
import static org.easybatch.core.impl.EngineBuilder.aNewEngine; | |
public class TreeParisTest { | |
//data source : http://opendata.paris.fr/explore/dataset/arbresalignementparis2010/download/?format=csv | |
@Test | |
public void testWithRawData() throws Exception { | |
File dataSource = new File(this.getClass().getResource("/arbresalignementparis2010.csv").toURI()); | |
Engine engine = aNewEngine() | |
.reader(new FlatFileRecordReader(dataSource)) | |
.filter(new HeaderRecordFilter()) | |
.processor(new ComputationalRecordProcessor<StringRecord, StringRecord, Integer>() { | |
Integer count = 0; | |
@Override | |
public Integer getComputationResult() { | |
return count; | |
} | |
@Override | |
public StringRecord processRecord(StringRecord record) throws Exception { | |
if (Float.parseFloat(record.getPayload().split(";")[3]) > 0) { | |
count++; | |
} | |
return record; | |
} | |
}) | |
.build(); | |
Report report = engine.call(); | |
Integer total = (Integer) report.getBatchResult(); | |
System.out.println("total = " + total + " calculated in " + report.getFormattedBatchDuration()); | |
} | |
@Test | |
public void testWithMappingDataToDomainObject() throws Exception { | |
File dataSource = new File(this.getClass().getResource("/arbresalignementparis2010.csv").toURI()); | |
DelimitedRecordMapper<Tree> recordMapper = new DelimitedRecordMapper<Tree>(Tree.class, new Integer[]{2, 3}, new String[]{"adresse", "hauteur"}); | |
recordMapper.setDelimiter(";"); | |
Engine engine = aNewEngine() | |
.reader(new FlatFileRecordReader(dataSource)) | |
.filter(new HeaderRecordFilter()) | |
.mapper(recordMapper) | |
.processor(new ComputationalRecordProcessor<Tree, Tree, Integer>() { | |
Integer count = 0; | |
@Override | |
public Integer getComputationResult() { | |
return count; | |
} | |
@Override | |
public Tree processRecord(Tree tree) throws Exception { | |
if (tree.getHauteur() > 0) { | |
count++; | |
} | |
return tree; | |
} | |
}) | |
.build(); | |
Report report = engine.call(); | |
Integer total = (Integer) report.getBatchResult(); | |
System.out.println("total = " + total + " calculated in " + report.getFormattedBatchDuration()); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment