Thursday, 15 January 2015

Extract json data in hive

Extract json data in hive

Sample data:--

{"data1":[{"id":"10084673031_10152357071093032","from":{"category":"Computers\/technology","category_list":[{"id":"139823692748565","name":"Computers & Electronics"},{"id":"177721448951559","name":"Workplace & Office"}],"name":"Cisco","id":"10084673031"},"message":"The Network team was lucky enough to speak with First Lieutenant Andrew Marsh about his return and transition to life after Afghanistan. Andrew shares his story on how an online program targeted to veterans matched his technical military skills to those required in a civilian career.","picture":"http:\/\/external.ak.fbcdn.net\/safe_image.php?d=AQDPRCLRHhV0HEaE&w=154&h=154&url=http\u00253A\u00252F\u00252Fimg.youtube.com\u00252Fvi\u00252FKNedz6qq45A\u00252F0.jpg","link":"http:\/\/newsroom.cisco.com\/video-content?type=webcontent&articleId=1281697","name":"Online Program Matches Veterans to Civilian Careers - The Network: Cisco's Technology News Site","caption":"newsroom.cisco.com","description":"Andrew Marsh, First Lieutenant U.S. Marine Corps : I was sent to Afghanistan in January of 2012 and I was an advisor to an Afghan army cornel. so it was my job to advise him on a daily basis. And look at what he was doing communications wise.","icon":"http:\/\/static.ak.fbcdn.net\/rsrc.php\/v2\/yD\/r\/aS8ecmYRys0.gif","privacy":{"value":""},"type":"link","status_type":"shared_story","created_time":"2013-11-11T04:04:51+0000","updated_time":"2013-11-11T04:06:26+0000","likes":{"data":[{"id":"100004689103296","name":"Salvador Lara"},{"id":"1661003721","name":"Chad Allison"},{"id":"1347829998","name":"Lynn Young Dasher"},{"id":"100000750083815","name":"Shannon Vanlandingham"},{"id":"100003220428749","name":"Tom Peters"},{"id":"904540186","name":"Manuel van der Herder"},{"id":"776497928","name":"Luis Chiong"},{"id":"748655637","name":"Dave van Asch"},{"id":"100005256342167","name":"Os Meyti"},{"id":"100004025366069","name":"Denish Malam"},{"id":"1050751758","name":"Jens Mueller"},{"id":"506521037","name":"Jen Nickens"},{"id":"100000723537849","name":"Vanya Ivanova"},{"id":"100000875945917","name":"Danien Harper"},{"id":"100000546265440","name":"Robert Hnatko"},{"id":"656888964","name":"Iman Khayambashi"},{"id":"100000730517023","name":"Debayan Deb"},{"id":"1642383715","name":"Jonathan Schwartz"},{"id":"100000865663563","name":"John Mahoney"},{"id":"100003026212534","name":"Rafael Kireyev"},{"id":"1490631811","name":"Jackie Salvatierra Estrada"},{"id":"776984395","name":"Yuval Ariav"},{"id":"100000592615966","name":"Ingemar Petersson"},{"id":"1391145150","name":"Qassim Zia"},{"id":"606035502793053","name":"Cisco Community"}]}}]}

Jars need to register:--

/Users/dheerendra/work/json-serde-1.1.4-jar-with-dependencies.jar
/Users/dheerendra/work/hive-json-serde-0.2.jar
/Users/dheerendra/work/hive-serdes-1.0-SNAPSHOT.jar
/Users/dheerendra/work/json-serde-1.0.jar

Create table :--
create table json6(data1 array<struct<id:string,message:string,created_time:string,likes:struct<data:array<struct<id:string,name:string>>>>>)ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';

Load data in table:--

load data local inpath '/root/working/json_file.txt' overwrite into table json6;

Select data from table:--

select  id1,crt,id_4 from json6 lateral view explode(data1.id)id as id1  lateral view explode(data1.created_time)crtime as crt lateral view explode(data1.likes.data)id3  as id_3 lateral view explode(id_3.name)id4 as id_4;

Extract json data in hive

Sample data:--

{"data1":[{"id":"10084673031_10152357071093032","from":{"category":"Computers\/technology","category_list":[{"id":"139823692748565","name":"Computers & Electronics"},{"id":"177721448951559","name":"Workplace & Office"}],"name":"Cisco","id":"10084673031"},"message":"The Network team was lucky enough to speak with First Lieutenant Andrew Marsh about his return and transition to life after Afghanistan. Andrew shares his story on how an online program targeted to veterans matched his technical military skills to those required in a civilian career.","picture":"http:\/\/external.ak.fbcdn.net\/safe_image.php?d=AQDPRCLRHhV0HEaE&w=154&h=154&url=http\u00253A\u00252F\u00252Fimg.youtube.com\u00252Fvi\u00252FKNedz6qq45A\u00252F0.jpg","link":"http:\/\/newsroom.cisco.com\/video-content?type=webcontent&articleId=1281697","name":"Online Program Matches Veterans to Civilian Careers - The Network: Cisco's Technology News Site","caption":"newsroom.cisco.com","description":"Andrew Marsh, First Lieutenant U.S. Marine Corps : I was sent to Afghanistan in January of 2012 and I was an advisor to an Afghan army cornel. so it was my job to advise him on a daily basis. And look at what he was doing communications wise.","icon":"http:\/\/static.ak.fbcdn.net\/rsrc.php\/v2\/yD\/r\/aS8ecmYRys0.gif","privacy":{"value":""},"type":"link","status_type":"shared_story","created_time":"2013-11-11T04:04:51+0000","updated_time":"2013-11-11T04:06:26+0000","likes":{"data":[{"id":"100004689103296","name":"Salvador Lara"},{"id":"1661003721","name":"Chad Allison"},{"id":"1347829998","name":"Lynn Young Dasher"},{"id":"100000750083815","name":"Shannon Vanlandingham"},{"id":"100003220428749","name":"Tom Peters"},{"id":"904540186","name":"Manuel van der Herder"},{"id":"776497928","name":"Luis Chiong"},{"id":"748655637","name":"Dave van Asch"},{"id":"100005256342167","name":"Os Meyti"},{"id":"100004025366069","name":"Denish Malam"},{"id":"1050751758","name":"Jens Mueller"},{"id":"506521037","name":"Jen Nickens"},{"id":"100000723537849","name":"Vanya Ivanova"},{"id":"100000875945917","name":"Danien Harper"},{"id":"100000546265440","name":"Robert Hnatko"},{"id":"656888964","name":"Iman Khayambashi"},{"id":"100000730517023","name":"Debayan Deb"},{"id":"1642383715","name":"Jonathan Schwartz"},{"id":"100000865663563","name":"John Mahoney"},{"id":"100003026212534","name":"Rafael Kireyev"},{"id":"1490631811","name":"Jackie Salvatierra Estrada"},{"id":"776984395","name":"Yuval Ariav"},{"id":"100000592615966","name":"Ingemar Petersson"},{"id":"1391145150","name":"Qassim Zia"},{"id":"606035502793053","name":"Cisco Community"}]}}]}

Jars need to register:--

/Users/dheerendra/work/json-serde-1.1.4-jar-with-dependencies.jar
/Users/dheerendra/work/hive-json-serde-0.2.jar
/Users/dheerendra/work/hive-serdes-1.0-SNAPSHOT.jar
/Users/dheerendra/work/json-serde-1.0.jar

Create table :--
create table json6(data1 array<struct<id:string,message:string,created_time:string,likes:struct<data:array<struct<id:string,name:string>>>>>)ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';

Load data in table:--

load data local inpath '/root/working/json_file.txt' overwrite into table json6;

Select data from table:--

select  id1,crt,id_4 from json6 lateral view explode(data1.id)id as id1  lateral view explode(data1.created_time)crtime as crt lateral view explode(data1.likes.data)id3  as id_3 lateral view explode(id_3.name)id4 as id_4 lateral view explode(id_4.name)nm as nm1