Build avro schema programmatically in Java:
private static final Schema TIMESTAMP_MICROS_LOGICAL_TYPE = LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); private static final Schema DATE_LOGICAL_TYPE = LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)); private static final Schema DECIMAL_38_LOGICAL_TYPE = LogicalTypes.decimal(38, 9).addToSchema(Schema.create(Schema.Type.BYTES)); private static final Schema INTPUT_SCHEMA = SchemaBuilder.record("myRecordName") .fields() .requiredInt("myRequiredInt") .requiredString("myRequiredString") .optionalString("myOptionalString") .nullableString("myNullableString", "myNullableStringDefaultValue") .requiredBoolean("myRequiredBoolean") .requiredBytes("myRequiredBytes") .name("myBytesDecimal").type(DECIMAL_38_LOGICAL_TYPE).noDefault() .name("myRequiredTimestamp").type(TIMESTAMP_MICROS_LOGICAL_TYPE).noDefault() // needs to be timestamp_micros (not timestamp_millis) .name("myOptionalTimestamp").type().optional().type(TIMESTAMP_MICROS_LOGICAL_TYPE) .name("myRequiredDate").doc("Expiration date field").type(DATE_LOGICAL_TYPE).noDefault() .name("myRequiredArrayLongs").type().array().items().longType().noDefault() .name("myRequiredSubRecord").type(SchemaBuilder.record("myRequiredSubRecordType").fields().requiredDouble("myRequiredDouble").requiredBoolean("myRequiredBoolean").endRecord()).noDefault() .name("myOptionalSubRecord").type().optional().record("myOptionalSubRecordType").fields().requiredFloat("myRequiredFloat").requiredBoolean("myRequiredBoolean").endRecord() .name("myNullableSubRecord").type().nullable().record("myNullableSubRecordType").fields().requiredLong("myRequiredLong").requiredBoolean("myRequiredBoolean").endRecord().noDefault() .name("myOptionalArraySubRecords").type().nullable().array().items( SchemaBuilder.record("myOptionalArraySubRecord").fields().requiredBoolean("myRequiredBoolean").endRecord()).noDefault() .endRecord();
Create GenericRecord for the schema:
private static GenericRecord createGenericRecord(Schema schema) { GenericRecord record = new GenericData.Record(schema); record.put("myRequiredInt", 123); record.put("myRequiredString", "abc"); record.put("myRequiredBoolean", true); record.put("myRequiredBytes", ByteBuffer.wrap("ABC123".getBytes())); record.put("myBytesDecimal", doubleToByteBuffer(1.23d)); record.put("myRequiredTimestamp", System.currentTimeMillis() * 1000); // needs to be timestamp_micros (not timestamp_millis) record.put("myRequiredDate", (int) new Date(System.currentTimeMillis()).toLocalDate().toEpochDay()); record.put("myRequiredArrayLongs", Arrays.asList(1L, 2L, 3L)); Schema myRequiredSubRecordSchema = schema.getField("myRequiredSubRecord").schema(); GenericRecord myRequiredSubRecord = new GenericData.Record(myRequiredSubRecordSchema); myRequiredSubRecord.put("myRequiredDouble", 1.0d); myRequiredSubRecord.put("myRequiredBoolean", false); record.put("myRequiredSubRecord", myRequiredSubRecord); Schema myOptionalSubRecordSchema = unwrapSchema(schema.getField("myOptionalSubRecord").schema()); GenericRecord myOptionalSubRecord = new GenericData.Record(myOptionalSubRecordSchema); myOptionalSubRecord.put("myRequiredFloat", 2.0f); myOptionalSubRecord.put("myRequiredBoolean", true); record.put("myOptionalSubRecord", myOptionalSubRecord); Schema myNullableSubRecordSchema = unwrapSchema(schema.getField("myNullableSubRecord").schema()); GenericRecord myNullableSubRecord = new GenericData.Record(myNullableSubRecordSchema); myNullableSubRecord.put("myRequiredLong", 12L); myNullableSubRecord.put("myRequiredBoolean", false); record.put("myNullableSubRecord", myNullableSubRecord); Schema myOptionalArraySubRecords = schema.getField("myOptionalArraySubRecords").schema(); Schema arraySubRecordSchema = unwrapSchema(myOptionalArraySubRecords).getElementType(); GenericRecord mySubRecord1 = new GenericData.Record(arraySubRecordSchema); mySubRecord1.put("myRequiredBoolean", true); GenericRecord mySubRecord2 = new GenericData.Record(arraySubRecordSchema); mySubRecord2.put("myRequiredBoolean", false); record.put("myOptionalArraySubRecords", Arrays.asList(mySubRecord1, mySubRecord2)); return record; } private static void createAvroFile(String path) { DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(INTPUT_SCHEMA); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.setCodec(CodecFactory.snappyCodec()); try { dataFileWriter.create(INTPUT_SCHEMA, new File(path)); for (int i = 1; i <= 1; i++) { GenericRecord record = createGenericRecord(INTPUT_SCHEMA); dataFileWriter.append(record); } dataFileWriter.close(); } catch (IOException e) { throw new RuntimeException(e); } } private static Schema unwrapSchema(Schema schema) { if (schema.getType() == Schema.Type.UNION) { List<Schema> types = schema.getTypes(); return types.get(0).getType() == Schema.Type.NULL ? types.get(1) : types.get(0); } return schema; } private static ByteBuffer doubleToByteBuffer(double d) { BigDecimal bigDecimal = BigDecimal.valueOf(d).setScale(9, RoundingMode.UNNECESSARY); BigInteger bigInteger = bigDecimal.unscaledValue(); return ByteBuffer.wrap(bigInteger.toByteArray()); }
{ "type": "record", "name": "myRecordName", "fields": [ { "name": "myRequiredInt", "type": "int" }, { "name": "myRequiredString", "type": "string" }, { "name": "myOptionalString", "type": [ "null", "string" ], "default": null }, { "name": "myNullableString", "type": [ "string", "null" ], "default": "myNullableStringDefaultValue" }, { "name": "myRequiredBoolean", "type": "boolean" }, { "name": "myRequiredBytes", "type": "bytes" }, { "name": "myBytesDecimal", "type": { "type": "bytes", "logicalType": "decimal", "precision": 38, "scale": 9 } }, { "name": "myRequiredTimestamp", "type": { "type": "long", "logicalType": "timestamp-micros" } }, { "name": "myOptionalTimestamp", "type": [ "null", { "type": "long", "logicalType": "timestamp-micros" } ], "default": null }, { "name": "myRequiredDate", "type": { "type": "int", "logicalType": "date" }, "doc": "Expiration date field" }, { "name": "myRequiredArrayLongs", "type": { "type": "array", "items": "long" } }, { "name": "myRequiredSubRecord", "type": { "type": "record", "name": "myRequiredSubRecordType", "fields": [ { "name": "myRequiredDouble", "type": "double" }, { "name": "myRequiredBoolean", "type": "boolean" } ] } }, { "name": "myOptionalSubRecord", "type": [ "null", { "type": "record", "name": "myOptionalSubRecordType", "fields": [ { "name": "myRequiredFloat", "type": "float" }, { "name": "myRequiredBoolean", "type": "boolean" } ] } ], "default": null }, { "name": "myNullableSubRecord", "type": [ { "type": "record", "name": "myNullableSubRecordType", "fields": [ { "name": "myRequiredLong", "type": "long" }, { "name": "myRequiredBoolean", "type": "boolean" } ] }, "null" ] }, { "name": "myOptionalArraySubRecords", "type": [ { "type": "array", "items": { "type": "record", "name": "myOptionalArraySubRecord", "fields": [ { "name": "myRequiredBoolean", "type": "boolean" } ] } }, "null" ] } ] }
alias avro-tools-tojson='java -Dlog4j.configuration=file:$HOME/Downloads/log4j.properties -cp $HOME/.m2/repository/org/apache/avro/avro-tools/1.8.2/avro-tools-1.8.2.jar:$HOME/.m2/repository/org/xerial/snappy/snappy-java/1.1.8.4/snappy-java-1.1.8.4.jar org.apache.avro.tool.Main tojson' myRecord.snappy.avro
{ "myRequiredInt": 123, "myRequiredString": "abc", "myOptionalString": null, "myNullableString": null, "myRequiredBoolean": true, "myRequiredBytes": "ABC123", "myBytesDecimal": "IPO", "myRequiredTimestamp": 1685292531792000, "myOptionalTimestamp": null, "myRequiredDate": 19505, "myRequiredArrayLongs": [ 1, 2, 3 ], "myRequiredSubRecord": { "myRequiredDouble": 1.0, "myRequiredBoolean": false }, "myOptionalSubRecord": { "myOptionalSubRecordType": { "myRequiredFloat": 2.0, "myRequiredBoolean": true } }, "myNullableSubRecord": { "myNullableSubRecordType": { "myRequiredLong": 12, "myRequiredBoolean": false } }, "myOptionalArraySubRecords": { "array": [ { "myRequiredBoolean": true }, { "myRequiredBoolean": false } ] } }