How does Avro binary code encode a JSON string using Apache Avro?

I am trying to create avro binary for my JSON String. Below is my JSON String, and I created a simple method that will do the conversion, but I'm not sure if I am doing it right or not?

public static void main(String args[]) throws Exception{
try{
    Schema schema = new Parser().parse((TestExample.class.getResourceAsStream("/3233.avsc")));
    String json="{"+
        "  \"location\" : {"+
        "    \"devices\":["+
        "      {"+
        "        \"did\":\"9abd09-439bcd-629a8f\","+
        "        \"dt\":\"browser\","+
        "        \"usl\":{"+
        "          \"pos\":{"+
        "            \"source\":\"GPS\","+
        "            \"lat\":90.0,"+
        "            \"long\":101.0,"+
        "            \"acc\":100"+
        "          },"+
        "          \"addSource\":\"LL\","+
        "          \"add\":["+
        "            {"+
        "              \"val\":\"2123\","+
        "              \"type\" : \"NUM\""+
        "            },"+
        "            {"+
        "              \"val\":\"Harris ST\","+
        "              \"type\" : \"ST\""+
        "            }"+
        "          ],"+
        "          \"ei\":{"+
        "            \"ibm\":true,"+
        "            \"sr\":10,"+
        "            \"ienz\":true,"+
        "            \"enz\":100,"+
        "            \"enr\":10"+
        "          },"+
        "          \"lm\":1390598086120"+
        "        }"+
        "      }"+
        "    ],"+
        "    \"ver\" : \"1.0\""+
        "  }"+
        "}";

    byte[] avroByteArray = fromJsonToAvro(json,schema);

} catch (Exception ex) {
    // log an exception
}

Below method will convert my JSON string to Avro binary encoding -

private static byte[] fromJsonToAvro(String json, Schema schema) throws Exception {

    InputStream input = new ByteArrayInputStream(json.getBytes());
    DataInputStream din = new DataInputStream(input);   

    Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din);

    DatumReader<Object> reader = new GenericDatumReader<Object>(schema);
    Object datum = reader.read(null, decoder);


    GenericDatumWriter<Object>  w = new GenericDatumWriter<Object>(schema);
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

    Encoder e = EncoderFactory.get().binaryEncoder(outputStream, null);

    w.write(datum, e);
    e.flush();

    return outputStream.toByteArray();
}

Can someone take a look and let me know if I am trying to execute Avro binary, JSON String correctly, or not?

+3
source share
3 answers

I think the OP is correct. This will record the Avro records themselves without the scheme that will be present if it was an Avro data file.

Avro (, .
    & ; JSON Avro: DataFileWriteTool
    & ; Avro JSON: DataFileReadTool

, .

@Grapes([
    @Grab(group='org.apache.avro', module='avro', version='1.7.7')
])

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.io.JsonEncoder;

String schema = '''{
  "type":"record",
  "namespace":"foo",
  "name":"Person",
  "fields":[
    {
      "name":"name",
      "type":"string"
    },
    {
      "name":"age",
      "type":"int"
    }
  ]
}'''
String json = "{" +
  "\"name\":\"Frank\"," +
  "\"age\":47" +
"}"

assert avroToJson(jsonToAvro(json, schema), schema) == json


public static byte[] jsonToAvro(String json, String schemaStr) throws IOException {
    InputStream input = null;
    GenericDatumWriter<GenericRecord> writer = null;
    Encoder encoder = null;
    ByteArrayOutputStream output = null;
    try {
        Schema schema = new Schema.Parser().parse(schemaStr);
        DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
        input = new ByteArrayInputStream(json.getBytes());
        output = new ByteArrayOutputStream();
        DataInputStream din = new DataInputStream(input);
        writer = new GenericDatumWriter<GenericRecord>(schema);
        Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din);
        encoder = EncoderFactory.get().binaryEncoder(output, null);
        GenericRecord datum;
        while (true) {
            try {
                datum = reader.read(null, decoder);
            } catch (EOFException eofe) {
                break;
            }
            writer.write(datum, encoder);
        }
        encoder.flush();
        return output.toByteArray();
    } finally {
        try { input.close(); } catch (Exception e) { }
    }
}

public static String avroToJson(byte[] avro, String schemaStr) throws IOException {
    boolean pretty = false;
    GenericDatumReader<GenericRecord> reader = null;
    JsonEncoder encoder = null;
    ByteArrayOutputStream output = null;
    try {
        Schema schema = new Schema.Parser().parse(schemaStr);
        reader = new GenericDatumReader<GenericRecord>(schema);
        InputStream input = new ByteArrayInputStream(avro);
        output = new ByteArrayOutputStream();
        DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
        encoder = EncoderFactory.get().jsonEncoder(schema, output, pretty);
        Decoder decoder = DecoderFactory.get().binaryDecoder(input, null);
        GenericRecord datum;
        while (true) {
            try {
                datum = reader.read(null, decoder);
            } catch (EOFException eofe) {
                break;
            }
            writer.write(datum, encoder);
        }
        encoder.flush();
        output.flush();
        return new String(output.toByteArray());
    } finally {
        try { if (output != null) output.close(); } catch (Exception e) { }
    }
}

, , (Avro ) . , JSON Avro, . , .

@Grapes([
    @Grab(group='org.apache.avro', module='avro', version='1.7.7')
])

// writes Avro as a http://avro.apache.org/docs/current/spec.html#Object+Container+Files rather than a sequence of records

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.io.JsonEncoder;


String schema = '''{
  "type":"record",
  "namespace":"foo",
  "name":"Person",
  "fields":[
    {
      "name":"name",
      "type":"string"
    },
    {
      "name":"age",
      "type":"int"
    }
  ]
}'''
String json = "{" +
  "\"name\":\"Frank\"," +
  "\"age\":47" +
"}"

assert avroToJson(jsonToAvro(json, schema)) == json


public static byte[] jsonToAvro(String json, String schemaStr) throws IOException {
    InputStream input = null;
    DataFileWriter<GenericRecord> writer = null;
    Encoder encoder = null;
    ByteArrayOutputStream output = null;
    try {
        Schema schema = new Schema.Parser().parse(schemaStr);
        DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
        input = new ByteArrayInputStream(json.getBytes());
        output = new ByteArrayOutputStream();
        DataInputStream din = new DataInputStream(input);
        writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>());
        writer.create(schema, output);
        Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din);
        GenericRecord datum;
        while (true) {
            try {
                datum = reader.read(null, decoder);
            } catch (EOFException eofe) {
                break;
            }
            writer.append(datum);
        }
        writer.flush();
        return output.toByteArray();
    } finally {
        try { input.close(); } catch (Exception e) { }
    }
}

public static String avroToJson(byte[] avro) throws IOException {
    boolean pretty = false;
    GenericDatumReader<GenericRecord> reader = null;
    JsonEncoder encoder = null;
    ByteArrayOutputStream output = null;
    try {
        reader = new GenericDatumReader<GenericRecord>();
        InputStream input = new ByteArrayInputStream(avro);
        DataFileStream<GenericRecord> streamReader = new DataFileStream<GenericRecord>(input, reader);
        output = new ByteArrayOutputStream();
        Schema schema = streamReader.getSchema();
        DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
        encoder = EncoderFactory.get().jsonEncoder(schema, output, pretty);
        for (GenericRecord datum : streamReader) {
            writer.write(datum, encoder);
        }
        encoder.flush();
        output.flush();
        return new String(output.toByteArray());
    } finally {
        try { if (output != null) output.close(); } catch (Exception e) { }
    }
}
+15

You can use avro-toolsto convert json file ( {input_file}.json) to avro file ( {output_file}.avro) when you know the {schema_file}.avscjson file scheme ( ). As below:

java -jar the/path/of/avro-tools-1.8.1.jar fromjson {input_file}.json   --schema-file {schema_file}.avsc > {output_file}.avro

By the way, the contents of the file {schema_file}.avscare:

{"type": "record",
 "name": "User",
  "fields": [
      {"name": "name", "type": "string"},
      {"name": "favorite_number",  "type": ["int", "null"]},
      {"name": "favorite_color", "type": ["string", "null"]}
  ]
 }

Download avro-tools-1.8.1

Download other avro tools

0
source

All Articles