Hadoop and Hive Installation
Hadoop and Hive Installation
java -version;
javac -version
wget https://dlcdn.apache.org/hadoop/common/hadoop-3.4.0/hadoop-3.4.0.tar.gz
#Unzip using tar
mv hadoop-3.4.0 ~/hadoop
cd ~/hadoop
mkdir dfsdata
cd dfsdata
mkdir -p {datanode,namenode}
cd ..
chmod -R 777 dfsdata
#Edit the Config Files in ~/hadoop/etc/hadoop/
# For core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
# For hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>3</value>
</property><property>
<name>dfs.name.dir</name>
<value>/home/ubuntu/hadoop/dfsdata/namenode</value>
</property><property>
<name>dfs.data.dir</name>
<value>/home/ubuntu/hadoop/dfsdata/datanode</value>
</property>
# For mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
# For yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property><property>
<name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
#Update hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
sh localhost
give yes and exit
# Initializes or Resets the Hadoop HDFS by clearing the NameNode's
metadata and preparing the filesystem for use, erasing all stored data
start-dfs.sh
start-yarn.sh
stop-yarn.sh
stop-dfs.sh
INSTALL LATEST HIVE along with Hadoop 3.4.0
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
source ~/.bashrc
start-all.sh
#Update ~/hadoop/etc/hadoop/mapred-site.xml add following props
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
#Restart hadoop
stop-all.sh
start-all.sh
#Test mapreduce with a sample job
yarn jar
$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.0.jar pi
10 100
#Download apache-hive-4.0.0-bin.tar.gz
tar xzf apache-hive-4.0.0-bin.tar.gz
cp hive-env.sh.template hive-env.sh
cp hive-default.xml.template hive-site.xml
#Edit hive.site.xml and below 2 props at the beginning
<property>
<name>system:java.io.tmpdir</name>
<value>/tmp/hive/java</value>
</property>
<property>
<name>system:user.name</name>
<value>${user.name}</value>
</property>
#Edit hive-site.xml find the below text and remove extra characters
between word "for transactional" (search for below text)
*******************************************************************************************************
Ensures commands with OVERWRITE (such as INSERT OVERWRITE) acquire Exclusive
locks
export HIVE_HOME=/home/ubuntu/hive
export HIVE_CONF_DIR=$HIVE_HOME/conf
export PATH=$PATH:$HIVE_HOME/bin
export CLASSPATH=$CLASSPATH:$HADOOP_HOME/lib/*:.
export CLASSPATH=$CLASSPATH:$HIVE_HOME/lib/*:.
source ~/.bashrc
#Create hdfs directories and give corresponding permissions
hdfs dfs -ls /
hdfs dfs -rm -r /user
hdfs dfs -ls /
hdfs dfs -mkdir -p /user/hive/warehouse
rm -r /home/ubuntu/hive/lib/guava-*.jar
cp /home/ubuntu/hadoop/share/hadoop/common/lib/guava-27.0-jre.jar
/home/ubuntu/hive/lib
#Edit latest .sql file location and Comment First 2 Queries of DDL
Statements for functions with - - ( DDL statement for functions)
/home/ubuntu/hive/scripts/metastore/upgrade/derby/hive-schema-4.0.0.derby.sql
#Initialize Hive Metastore
hive
!quit
# For better Visualisation use beeline
------------------------------------
beeline -u jdbc:hive2://
show databases;
use college;
SHOW TABLES;
!quit