Last active
August 31, 2015 19:05
-
-
Save bradkarels/9c522b71d228cac9357f to your computer and use it in GitHub Desktop.
Fuddling about with avro and the scala repl...
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bkarels@ubuntu:~/dev/avro/thing$ cat thing.avsc | |
{ | |
"namespace": "com.rbh.avro.pops.thing", | |
"type": "record", | |
"name": "Thing", | |
"fields": [ | |
{ | |
"name": "name", | |
"type": "string" | |
}, | |
{ | |
"name": "isgood", | |
"type": "boolean" | |
}, | |
{ | |
"name": "description", | |
"type": "string" | |
} | |
] | |
} | |
One way to precompile your schema: | |
mkdir -p src/main | |
mkdir lib | |
mkdir bin | |
cp avro-tools-1.7.7.jar ./lib | |
cp avro-1.7.7.jar ./lib | |
# Use avro to generate java code in src/main: | |
java -jar ./lib/avro-tools-1.7.7.jar compile schema thing.avsc ./src/main | |
# Compile the source: | |
javac -d bin -cp lib/avro-tools-1.7.7.jar:lib/avro-1.7.7.jar:. src/main/com/rbh/avro/pops/thing/Thing.java | |
# Put things in the jar: | |
cd bin | |
jar cf thing.jar com/ | |
# Examine the jar: | |
jar tf thing.jar | |
Picked up _JAVA_OPTIONS: -Xms128m -Xmx512m -XX:PermSize=128m -XX:MaxPermSize=512m | |
META-INF/ | |
META-INF/MANIFEST.MF | |
com/ | |
com/rbh/ | |
com/rbh/avro/ | |
com/rbh/avro/pops/ | |
com/rbh/avro/pops/thing/ | |
com/rbh/avro/pops/thing/Thing$Builder.class | |
com/rbh/avro/pops/thing/Thing.class | |
com/rbh/avro/pops/thing/Thing$1.class | |
Do things with spark.... | |
### Create some "things" | |
Spark context available as sc. | |
scala> import com.rbh.avro.pops.thing._ | |
import com.rbh.avro.pops.thing._ | |
scala> val t = new Thing() | |
t: com.rbh.avro.pops.thing.Thing = {"name": null, "isgood": false, "description": null} | |
scala> val name:CharSequence = "foo" | |
name: CharSequence = foo | |
scala> val desc:CharSequence = "poo" | |
desc: CharSequence = poo | |
scala> val t0 = new Thing(name,false,desc) | |
t0: com.rbh.avro.pops.thing.Thing = {"name": "foo", "isgood": false, "description": "poo"} | |
scala> t0.description | |
res0: CharSequence = poo | |
scala> t0.description.toString | |
res1: String = poo | |
scala> val t2 = new Thing | |
t2: com.rbh.avro.pops.thing.Thing = {"name": null, "isgood": false, "description": null} | |
scala> t2.setName(name) | |
scala> t2.name | |
res3: CharSequence = foo | |
scala> t2.setDescription(desc) | |
scala> t2.setIsgood(false) | |
scala> t2.toString | |
res6: String = {"name": "foo", "isgood": false, "description": "poo"} | |
### Create a thing with a builder #### | |
scala> val t3:Thing = Thing.newBuilder().setName(name).setIsgood(true).setDescription(desc).build() | |
t3: com.rbh.avro.pops.thing.Thing = {"name": "foo", "isgood": true, "description": "poo"} | |
### Serialize them ### | |
scala> import org.apache.avro.io._ | |
import org.apache.avro.io._ | |
scala> import org.apache.avro.specific._ | |
import org.apache.avro.specific._ | |
scala> val thingDatumWriter:DatumWriter[Thing] = new SpecificDatumWriter[Thing](classOf[Thing]) | |
thingDatumWriter: org.apache.avro.io.DatumWriter[com.rbh.avro.pops.thing.Thing] = org.apache.avro.specific.SpecificDatumWriter@7e1ca523 | |
scala> import org.apache.avro.file._ | |
import org.apache.avro.file._ | |
scala> val dataFileWriter:DataFileWriter[Thing] = new DataFileWriter[Thing](thingDatumWriter) | |
dataFileWriter: org.apache.avro.file.DataFileWriter[com.rbh.avro.pops.thing.Thing] = org.apache.avro.file.DataFileWriter@35f5e371 | |
scala> import java.io.File | |
import java.io.File | |
scala> dataFileWriter.create(t2.getSchema(), new File("things.avro")) | |
res8: org.apache.avro.file.DataFileWriter[com.rbh.avro.pops.thing.Thing] = org.apache.avro.file.DataFileWriter@35f5e371 | |
scala> dataFileWriter.append(t2) | |
scala> dataFileWriter.append(t0) | |
scala> dataFileWriter.close() | |
#file things.avro can now be found on FS | |
### Do some deserialization on the file ### | |
scala> val thingDatumReader:DatumReader[Thing] = new SpecificDatumReader[Thing](classOf[Thing]) | |
thingDatumReader: org.apache.avro.io.DatumReader[com.rbh.avro.pops.thing.Thing] = org.apache.avro.specific.SpecificDatumReader@40aa66fd | |
scala> val dataFileReader:DataFileReader[Thing] = new DataFileReader[Thing](new File("things.avro"), thingDatumReader) | |
dataFileReader: org.apache.avro.file.DataFileReader[com.rbh.avro.pops.thing.Thing] = org.apache.avro.file.DataFileReader@5f514be1 | |
scala> var thing:Thing = null | |
thing: com.rbh.avro.pops.thing.Thing = null | |
scala> while (dataFileReader.hasNext()) { | |
| thing = dataFileReader.next(thing) | |
| println(thing) | |
| } | |
{"name": "foo", "isgood": false, "description": "poo"} | |
{"name": "foo", "isgood": false, "description": "poo"} | |
scala> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment