Skip to content

Instantly share code, notes, and snippets.

@bradkarels
Last active August 31, 2015 19:05
Show Gist options
  • Save bradkarels/9c522b71d228cac9357f to your computer and use it in GitHub Desktop.
Save bradkarels/9c522b71d228cac9357f to your computer and use it in GitHub Desktop.
Fuddling about with avro and the scala repl...
bkarels@ubuntu:~/dev/avro/thing$ cat thing.avsc
{
"namespace": "com.rbh.avro.pops.thing",
"type": "record",
"name": "Thing",
"fields": [
{
"name": "name",
"type": "string"
},
{
"name": "isgood",
"type": "boolean"
},
{
"name": "description",
"type": "string"
}
]
}
One way to precompile your schema:
mkdir -p src/main
mkdir lib
mkdir bin
cp avro-tools-1.7.7.jar ./lib
cp avro-1.7.7.jar ./lib
# Use avro to generate java code in src/main:
java -jar ./lib/avro-tools-1.7.7.jar compile schema thing.avsc ./src/main
# Compile the source:
javac -d bin -cp lib/avro-tools-1.7.7.jar:lib/avro-1.7.7.jar:. src/main/com/rbh/avro/pops/thing/Thing.java
# Put things in the jar:
cd bin
jar cf thing.jar com/
# Examine the jar:
jar tf thing.jar
Picked up _JAVA_OPTIONS: -Xms128m -Xmx512m -XX:PermSize=128m -XX:MaxPermSize=512m
META-INF/
META-INF/MANIFEST.MF
com/
com/rbh/
com/rbh/avro/
com/rbh/avro/pops/
com/rbh/avro/pops/thing/
com/rbh/avro/pops/thing/Thing$Builder.class
com/rbh/avro/pops/thing/Thing.class
com/rbh/avro/pops/thing/Thing$1.class
Do things with spark....
### Create some "things"
Spark context available as sc.
scala> import com.rbh.avro.pops.thing._
import com.rbh.avro.pops.thing._
scala> val t = new Thing()
t: com.rbh.avro.pops.thing.Thing = {"name": null, "isgood": false, "description": null}
scala> val name:CharSequence = "foo"
name: CharSequence = foo
scala> val desc:CharSequence = "poo"
desc: CharSequence = poo
scala> val t0 = new Thing(name,false,desc)
t0: com.rbh.avro.pops.thing.Thing = {"name": "foo", "isgood": false, "description": "poo"}
scala> t0.description
res0: CharSequence = poo
scala> t0.description.toString
res1: String = poo
scala> val t2 = new Thing
t2: com.rbh.avro.pops.thing.Thing = {"name": null, "isgood": false, "description": null}
scala> t2.setName(name)
scala> t2.name
res3: CharSequence = foo
scala> t2.setDescription(desc)
scala> t2.setIsgood(false)
scala> t2.toString
res6: String = {"name": "foo", "isgood": false, "description": "poo"}
### Create a thing with a builder ####
scala> val t3:Thing = Thing.newBuilder().setName(name).setIsgood(true).setDescription(desc).build()
t3: com.rbh.avro.pops.thing.Thing = {"name": "foo", "isgood": true, "description": "poo"}
### Serialize them ###
scala> import org.apache.avro.io._
import org.apache.avro.io._
scala> import org.apache.avro.specific._
import org.apache.avro.specific._
scala> val thingDatumWriter:DatumWriter[Thing] = new SpecificDatumWriter[Thing](classOf[Thing])
thingDatumWriter: org.apache.avro.io.DatumWriter[com.rbh.avro.pops.thing.Thing] = org.apache.avro.specific.SpecificDatumWriter@7e1ca523
scala> import org.apache.avro.file._
import org.apache.avro.file._
scala> val dataFileWriter:DataFileWriter[Thing] = new DataFileWriter[Thing](thingDatumWriter)
dataFileWriter: org.apache.avro.file.DataFileWriter[com.rbh.avro.pops.thing.Thing] = org.apache.avro.file.DataFileWriter@35f5e371
scala> import java.io.File
import java.io.File
scala> dataFileWriter.create(t2.getSchema(), new File("things.avro"))
res8: org.apache.avro.file.DataFileWriter[com.rbh.avro.pops.thing.Thing] = org.apache.avro.file.DataFileWriter@35f5e371
scala> dataFileWriter.append(t2)
scala> dataFileWriter.append(t0)
scala> dataFileWriter.close()
#file things.avro can now be found on FS
### Do some deserialization on the file ###
scala> val thingDatumReader:DatumReader[Thing] = new SpecificDatumReader[Thing](classOf[Thing])
thingDatumReader: org.apache.avro.io.DatumReader[com.rbh.avro.pops.thing.Thing] = org.apache.avro.specific.SpecificDatumReader@40aa66fd
scala> val dataFileReader:DataFileReader[Thing] = new DataFileReader[Thing](new File("things.avro"), thingDatumReader)
dataFileReader: org.apache.avro.file.DataFileReader[com.rbh.avro.pops.thing.Thing] = org.apache.avro.file.DataFileReader@5f514be1
scala> var thing:Thing = null
thing: com.rbh.avro.pops.thing.Thing = null
scala> while (dataFileReader.hasNext()) {
| thing = dataFileReader.next(thing)
| println(thing)
| }
{"name": "foo", "isgood": false, "description": "poo"}
{"name": "foo", "isgood": false, "description": "poo"}
scala>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment