0% found this document useful (0 votes)
4 views

Hive Exercises

Uploaded by

nthpro.4259
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

Hive Exercises

Uploaded by

nthpro.4259
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 7

/**

* hive exercises
*
* part of a longer hadoop video course at http://bit.ly/learn-hadoop/
*
* @author rICh <[email protected]>
*/

// our sample "page_view" data looks like this:


1391102617,1234,http://quicloud.com/,http://google.com/,198.211.110.9
1391101111,1234,http://somewhere.com/,http://google.com/,197.211.110.9
1391002637,1234,http://somwhere-else.com/,http://yahoo.com/,196.211.110.9
1391002617,1234,http://quicloud.com/,http://bing.com/,198.211.110.9

// start up hive
Hive
// this is an external table
CREATE DATABASE page_view;

// note: if using the AWS-backed 'cloudera manager' stack, change '/user/cloudera/' to


'/user/ubuntu/'
CREATE EXTERNAL TABLE page_view_external(viewTime INT, userid
BIGINT,page_url STRING, referrer_url STRING,ip STRING) COMMENT 'This is the
EXTERNAL page view table' ROW FORMAT DELIMITED FIELDS TERMINATED
BY ',' LOCATION '/user/cloudera/input-data/page_view/';

DESC page_view_external;
DESC EXTENDED page_view_external;

//notice tableType (EXTERNAL_TABLE)

// this command returns quickly (probably seconds). why?


// ...if we are not doing any filtering (like where), and returning all rows, no MR is done
SELECT * FROM page_view_external;

// this command, however, takes several seconds


SELECT * FROM page_view_external WHERE page_url LIKE '%quicloud%';

SELECT DISTINCT(page_url) FROM page_view_external WHERE page_url LIKE


'%quicloud%';
// this is an internal table
CREATE TABLE page_view_internal(viewTime INT, userid BIGINT,page_url STRING,
referrer_url STRING,ip STRING) COMMENT 'This is the INTERNAL page view table'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
LOAD DATA LOCAL INPATH '/home/hadooptanhung/page_view.txt' INTO TABLE
page_view_internal;
// What happened to our "local" data? In another terminal do:
ls -lat ~/code-and-data/data/page_view/
// still there

// validate that the data made it into hive


SELECT * FROM page_view_internal;

// in another terminal, let's look for where the data lives


hadoop fs -ls /user/hive/warehouse/page_view_internal
// where does our "external" (page_view_external) tables data live? this should work,
right?
hadoop fs -ls /user/hive/warehouse/

// WRONG! This data is EXTERNAL and in the location that we connected up with the
CREATE TABLE command
hadoop fs -ls /user/cloudera/input-data/page_view/

DESC EXTENDED page_view_internal;

// notice tableType (MANAGED_TABLE)

// you may want to query the hive data more, but when you're done, let's drop each table
& notice what happens
SHOW TABLES;

DROP TABLE page_view_internal;


DROP TABLE page_view_external;
SHOW TABLES;

// in a 2nd terminal, run these commands. Which data do we expect to persist?


hadoop fs -ls /user/hive/warehouse/page_view_internal/
hadoop fs -ls /user/cloudera/input-data/page_view

You might also like