java - Created a nested schema in Apache Spark SQL -
i want load simple json schema in sparksession has employee address array . sample json below
{"firstname":"neil","lastname":"irani", "addresses" : [ { "city" : "brindavan", "state" : "nj" }, { "city" : "subala", "state" : "dt" }]}
i'm trying create schema loading json, believe there wrong in below way of creating schema ... please advise .. below code in java ... not find reasonable sample
list<structfield> employeefields = new arraylist<>(); employeefields.add(datatypes.createstructfield("firstname", datatypes.stringtype, true)); employeefields.add(datatypes.createstructfield("lastname", datatypes.stringtype, true)); employeefields.add(datatypes.createstructfield("email", datatypes.stringtype, true)); list<structfield> addressfields = new arraylist<>(); addressfields.add(datatypes.createstructfield("city", datatypes.stringtype, true)); addressfields.add(datatypes.createstructfield("state", datatypes.stringtype, true)); addressfields.add(datatypes.createstructfield("zip", datatypes.stringtype, true)); employeefields.add(datatypes.createstructfield("addresses", datatypes.createstructtype(addressfields), true)); structtype employeeschema = datatypes.createstructtype(employeefields); dataset<employee> rowdataset = sparksession.read() .option("inferschema", "false") .schema(employeeschema) .json("simple_employees.json").as(employeeencoder);
update
i not creating array type below code work fine
list<structfield> employeefields = new arraylist<>(); employeefields.add(datatypes.createstructfield("firstname", datatypes.stringtype, true)); employeefields.add(datatypes.createstructfield("lastname", datatypes.stringtype, true)); employeefields.add(datatypes.createstructfield("email", datatypes.stringtype, true)); list<structfield> addressfields = new arraylist<>(); addressfields.add(datatypes.createstructfield("city", datatypes.stringtype, true)); addressfields.add(datatypes.createstructfield("state", datatypes.stringtype, true)); addressfields.add(datatypes.createstructfield("zip", datatypes.stringtype, true)); arraytype addressstruct = datatypes.createarraytype( datatypes.createstructtype(addressfields)); employeefields.add(datatypes.createstructfield("addresses", addressstruct, true)); structtype employeeschema = datatypes.createstructtype(employeefields);
Comments
Post a Comment