Tag: json

Avro Schema – java and json/avsc

Build avro schema programmatically in Java:

    private static final Schema TIMESTAMP_MICROS_LOGICAL_TYPE = LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG));
    private static final Schema DATE_LOGICAL_TYPE = LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT));
    private static final Schema DECIMAL_38_LOGICAL_TYPE = LogicalTypes.decimal(38, 9).addToSchema(Schema.create(Schema.Type.BYTES));

    private static final Schema INTPUT_SCHEMA = SchemaBuilder.record("myRecordName")
            .fields()
            .requiredInt("myRequiredInt")
            .requiredString("myRequiredString")
            .optionalString("myOptionalString")
            .nullableString("myNullableString", "myNullableStringDefaultValue")
            .requiredBoolean("myRequiredBoolean")
            .requiredBytes("myRequiredBytes")
            .name("myBytesDecimal").type(DECIMAL_38_LOGICAL_TYPE).noDefault()
            .name("myRequiredTimestamp").type(TIMESTAMP_MICROS_LOGICAL_TYPE).noDefault() // needs to be timestamp_micros (not timestamp_millis)
            .name("myOptionalTimestamp").type().optional().type(TIMESTAMP_MICROS_LOGICAL_TYPE)
            .name("myRequiredDate").doc("Expiration date field").type(DATE_LOGICAL_TYPE).noDefault()
            .name("myRequiredArrayLongs").type().array().items().longType().noDefault()
            .name("myRequiredSubRecord").type(SchemaBuilder.record("myRequiredSubRecordType").fields().requiredDouble("myRequiredDouble").requiredBoolean("myRequiredBoolean").endRecord()).noDefault()
            .name("myOptionalSubRecord").type().optional().record("myOptionalSubRecordType").fields().requiredFloat("myRequiredFloat").requiredBoolean("myRequiredBoolean").endRecord()
            .name("myNullableSubRecord").type().nullable().record("myNullableSubRecordType").fields().requiredLong("myRequiredLong").requiredBoolean("myRequiredBoolean").endRecord().noDefault()
            .name("myOptionalArraySubRecords").type().nullable().array().items(
                    SchemaBuilder.record("myOptionalArraySubRecord").fields().requiredBoolean("myRequiredBoolean").endRecord()).noDefault()
            .endRecord();

Create GenericRecord for the schema:

    private static GenericRecord createGenericRecord(Schema schema) {
        GenericRecord record = new GenericData.Record(schema);
        record.put("myRequiredInt", 123);
        record.put("myRequiredString", "abc");
        record.put("myRequiredBoolean", true);
        record.put("myRequiredBytes", ByteBuffer.wrap("ABC123".getBytes()));
        record.put("myBytesDecimal",  doubleToByteBuffer(1.23d));
        record.put("myRequiredTimestamp", System.currentTimeMillis() * 1000); // needs to be timestamp_micros (not timestamp_millis)
        record.put("myRequiredDate", (int) new Date(System.currentTimeMillis()).toLocalDate().toEpochDay());
        record.put("myRequiredArrayLongs", Arrays.asList(1L, 2L, 3L));
        Schema myRequiredSubRecordSchema = schema.getField("myRequiredSubRecord").schema();
        GenericRecord myRequiredSubRecord = new GenericData.Record(myRequiredSubRecordSchema);
        myRequiredSubRecord.put("myRequiredDouble", 1.0d);
        myRequiredSubRecord.put("myRequiredBoolean", false);
        record.put("myRequiredSubRecord", myRequiredSubRecord);
        Schema myOptionalSubRecordSchema = unwrapSchema(schema.getField("myOptionalSubRecord").schema());
        GenericRecord myOptionalSubRecord = new GenericData.Record(myOptionalSubRecordSchema);
        myOptionalSubRecord.put("myRequiredFloat", 2.0f);
        myOptionalSubRecord.put("myRequiredBoolean", true);
        record.put("myOptionalSubRecord", myOptionalSubRecord);
        Schema myNullableSubRecordSchema = unwrapSchema(schema.getField("myNullableSubRecord").schema());
        GenericRecord myNullableSubRecord = new GenericData.Record(myNullableSubRecordSchema);
        myNullableSubRecord.put("myRequiredLong", 12L);
        myNullableSubRecord.put("myRequiredBoolean", false);
        record.put("myNullableSubRecord", myNullableSubRecord);
        Schema myOptionalArraySubRecords = schema.getField("myOptionalArraySubRecords").schema();
        Schema arraySubRecordSchema = unwrapSchema(myOptionalArraySubRecords).getElementType();
        GenericRecord mySubRecord1 = new GenericData.Record(arraySubRecordSchema);
        mySubRecord1.put("myRequiredBoolean", true);
        GenericRecord mySubRecord2 = new GenericData.Record(arraySubRecordSchema);
        mySubRecord2.put("myRequiredBoolean", false);
        record.put("myOptionalArraySubRecords", Arrays.asList(mySubRecord1, mySubRecord2));
        return record;
    }


    private static void createAvroFile(String path) {
        DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(INTPUT_SCHEMA);
        DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
        dataFileWriter.setCodec(CodecFactory.snappyCodec());
        try {
            dataFileWriter.create(INTPUT_SCHEMA, new File(path));
            for (int i = 1; i <= 1; i++) {
                GenericRecord record = createGenericRecord(INTPUT_SCHEMA);
                dataFileWriter.append(record);
            }
            dataFileWriter.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static Schema unwrapSchema(Schema schema) {
        if (schema.getType() == Schema.Type.UNION) {
            List<Schema> types = schema.getTypes();
            return types.get(0).getType() == Schema.Type.NULL ? types.get(1) : types.get(0);
        }
        return schema;
    }

    private static ByteBuffer doubleToByteBuffer(double d) {
        BigDecimal bigDecimal = BigDecimal.valueOf(d).setScale(9, RoundingMode.UNNECESSARY);
        BigInteger bigInteger = bigDecimal.unscaledValue();
        return ByteBuffer.wrap(bigInteger.toByteArray());
    }
{
  "type": "record",
  "name": "myRecordName",
  "fields": [
    {
      "name": "myRequiredInt",
      "type": "int"
    },
    {
      "name": "myRequiredString",
      "type": "string"
    },
    {
      "name": "myOptionalString",
      "type": [
        "null",
        "string"
      ],
      "default": null
    },
    {
      "name": "myNullableString",
      "type": [
        "string",
        "null"
      ],
      "default": "myNullableStringDefaultValue"
    },
    {
      "name": "myRequiredBoolean",
      "type": "boolean"
    },
    {
      "name": "myRequiredBytes",
      "type": "bytes"
    },
    {
      "name": "myBytesDecimal",
      "type": {
        "type": "bytes",
        "logicalType": "decimal",
        "precision": 38,
        "scale": 9
      }
    },
    {
      "name": "myRequiredTimestamp",
      "type": {
        "type": "long",
        "logicalType": "timestamp-micros"
      }
    },
    {
      "name": "myOptionalTimestamp",
      "type": [
        "null",
        {
          "type": "long",
          "logicalType": "timestamp-micros"
        }
      ],
      "default": null
    },
    {
      "name": "myRequiredDate",
      "type": {
        "type": "int",
        "logicalType": "date"
      },
      "doc": "Expiration date field"
    },
    {
      "name": "myRequiredArrayLongs",
      "type": {
        "type": "array",
        "items": "long"
      }
    },
    {
      "name": "myRequiredSubRecord",
      "type": {
        "type": "record",
        "name": "myRequiredSubRecordType",
        "fields": [
          {
            "name": "myRequiredDouble",
            "type": "double"
          },
          {
            "name": "myRequiredBoolean",
            "type": "boolean"
          }
        ]
      }
    },
    {
      "name": "myOptionalSubRecord",
      "type": [
        "null",
        {
          "type": "record",
          "name": "myOptionalSubRecordType",
          "fields": [
            {
              "name": "myRequiredFloat",
              "type": "float"
            },
            {
              "name": "myRequiredBoolean",
              "type": "boolean"
            }
          ]
        }
      ],
      "default": null
    },
    {
      "name": "myNullableSubRecord",
      "type": [
        {
          "type": "record",
          "name": "myNullableSubRecordType",
          "fields": [
            {
              "name": "myRequiredLong",
              "type": "long"
            },
            {
              "name": "myRequiredBoolean",
              "type": "boolean"
            }
          ]
        },
        "null"
      ]
    },
    {
      "name": "myOptionalArraySubRecords",
      "type": [
        {
          "type": "array",
          "items": {
            "type": "record",
            "name": "myOptionalArraySubRecord",
            "fields": [
              {
                "name": "myRequiredBoolean",
                "type": "boolean"
              }
            ]
          }
        },
        "null"
      ]
    }
  ]
}

alias avro-tools-tojson='java -Dlog4j.configuration=file:$HOME/Downloads/log4j.properties -cp $HOME/.m2/repository/org/apache/avro/avro-tools/1.8.2/avro-tools-1.8.2.jar:$HOME/.m2/repository/org/xerial/snappy/snappy-java/1.1.8.4/snappy-java-1.1.8.4.jar org.apache.avro.tool.Main tojson'
 myRecord.snappy.avro
{
  "myRequiredInt": 123,
  "myRequiredString": "abc",
  "myOptionalString": null,
  "myNullableString": null,
  "myRequiredBoolean": true,
  "myRequiredBytes": "ABC123",
  "myBytesDecimal": "IPO",
  "myRequiredTimestamp": 1685292531792000,
  "myOptionalTimestamp": null,
  "myRequiredDate": 19505,
  "myRequiredArrayLongs": [
    1,
    2,
    3
  ],
  "myRequiredSubRecord": {
    "myRequiredDouble": 1.0,
    "myRequiredBoolean": false
  },
  "myOptionalSubRecord": {
    "myOptionalSubRecordType": {
      "myRequiredFloat": 2.0,
      "myRequiredBoolean": true
    }
  },
  "myNullableSubRecord": {
    "myNullableSubRecordType": {
      "myRequiredLong": 12,
      "myRequiredBoolean": false
    }
  },
  "myOptionalArraySubRecords": {
    "array": [
      {
        "myRequiredBoolean": true
      },
      {
        "myRequiredBoolean": false
      }
    ]
  }
}