Created
October 12, 2023 18:48
-
-
Save kjsingh/0a6973c4909602ba0248f31b3153858a to your computer and use it in GitHub Desktop.
Avro All Data Types
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.avro.Schema; | |
import org.apache.avro.generic.GenericData; | |
import org.apache.avro.generic.GenericRecord; | |
import org.apache.avro.generic.GenericData.Array; | |
import org.apache.avro.generic.GenericData.EnumSymbol; | |
import org.apache.avro.generic.GenericData.Fixed; | |
import org.apache.avro.file.DataFileWriter; | |
import org.apache.avro.io.DatumWriter; | |
import org.apache.avro.io.EncoderFactory; | |
import org.apache.avro.io.JsonEncoder; | |
import org.apache.avro.io.BinaryEncoder; | |
import java.io.File; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
public class AvroDataTypesGenerator { | |
public static void main(String[] args) { | |
// Define an Avro schema with all complex and primitive types | |
String avroSchemaJSON = "{\n" + | |
" \"type\": \"record\",\n" + | |
" \"name\": \"ComplexDataTypesExample\",\n" + | |
" \"fields\": [\n" + | |
" {\"name\": \"intField\", \"type\": \"int\"},\n" + | |
" {\"name\": \"longField\", \"type\": \"long\"},\n" + | |
" {\"name\": \"floatField\", \"type\": \"float\"},\n" + | |
" {\"name\": \"doubleField\", \"type\": \"double\"},\n" + | |
" {\"name\": \"stringField\", \"type\": \"string\"},\n" + | |
" {\"name\": \"booleanField\", \"type\": \"boolean\"},\n" + | |
" {\"name\": \"bytesField\", \"type\": \"bytes\"},\n" + | |
" {\"name\": \"nullField\", \"type\": \"null\"},\n" + | |
" {\"name\": \"recordField\", \"type\": {\n" + | |
" \"type\": \"record\",\n" + | |
" \"name\": \"InnerRecord\",\n" + | |
" \"fields\": [\n" + | |
" {\"name\": \"field1\", \"type\": \"int\"},\n" + | |
" {\"name\": \"field2\", \"type\": \"string\"}\n" + | |
" ]\n" + | |
" }\n" + | |
" },\n" + | |
" {\"name\": \"enumField\", \"type\": {\n" + | |
" \"type\": \"enum\",\n" + | |
" \"name\": \"Color\",\n" + | |
" \"symbols\": [\"RED\", \"GREEN\", \"BLUE\"]\n" + | |
" }\n" + | |
" },\n" + | |
" {\"name\": \"arrayField\", \"type\": {\n" + | |
" \"type\": \"array\",\n" + | |
" \"items\": \"string\"\n" + | |
" }\n" + | |
" },\n" + | |
" {\"name\": \"mapField\", \"type\": {\n" + | |
" \"type\": \"map\",\n" + | |
" \"values\": \"int\"\n" + | |
" }\n" + | |
" },\n" + | |
" {\"name\": \"unionField\", \"type\": [\"int\", \"string\"]},\n" + | |
" {\"name\": \"fixedField\", \"type\": {\n" + | |
" \"type\": \"fixed\",\n" + | |
" \"name\": \"FixedBytes\",\n" + | |
" \"size\": 4\n" + | |
" }\n" + | |
" }\n" + | |
" ]\n" + | |
"}"; | |
Schema.Parser parser = new Schema.Parser(); | |
Schema schema = parser.parse(avroSchemaJSON); | |
// Create Avro records with sample data for each complex type | |
GenericRecord record = new GenericData.Record(schema); | |
// Populate the record with sample data | |
record.put("intField", 42); | |
record.put("longField", 1234567890L); | |
record.put("floatField", 3.14f); | |
record.put("doubleField", 2.71828); | |
record.put("stringField", "Hello, Avro!"); | |
record.put("booleanField", true); | |
record.put("bytesField", "Avro".getBytes()); | |
record.put("nullField", null); | |
// Create a sample record | |
GenericRecord innerRecord = new GenericData.Record(schema.getField("recordField").schema()); | |
innerRecord.put("field1", 42); | |
innerRecord.put("field2", "Nested Record"); | |
record.put("recordField", innerRecord); | |
// Create a sample enum | |
GenericData.EnumSymbol enumSymbol = new EnumSymbol(schema.getField("enumField").schema(), "RED"); | |
record.put("enumField", enumSymbol); | |
// Create a sample array | |
List<CharSequence> stringArray = new ArrayList<>(); | |
stringArray.add("One"); | |
stringArray.add("Two"); | |
stringArray.add("Three"); | |
Array<CharSequence> array = new Array<>(schema.getField("arrayField").schema(), stringArray); | |
record.put("arrayField", array); | |
// Create a sample map | |
Map<String, Integer> intMap = new HashMap<>(); | |
intMap.put("key1", 1); | |
intMap.put("key2", 2); | |
Map<CharSequence, Integer> map = new HashMap<>(intMap); | |
record.put("mapField", map); | |
// Create a sample union with a string value | |
record.put("unionField", "Union Value"); | |
// Create a sample fixed | |
Fixed fixedBytes = new Fixed(schema.getField("fixedField").schema(), new byte[]{0, 1, 2, 3}); | |
record.put("fixedField", fixedBytes); | |
try { | |
// Write the Avro record to a file | |
File avroFile = new File("avroDataTypes.avro"); | |
DatumWriter<GenericRecord> datumWriter = new org.apache.avro.generic.GenericDatumWriter<>(schema); | |
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); | |
dataFileWriter.create(schema, avroFile); | |
dataFileWriter.append(record); | |
dataFileWriter.close(); | |
System.out.println("Avro file 'avroDataTypes.avro' has been generated."); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment