0% found this document useful (0 votes)
4 views

Hadoop and Hive Installation

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

Hadoop and Hive Installation

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 19

#Update repo and install Java 11 for Hadoop

sudo apt update

java -version;

sudo apt install openjdk-11-jdk -y

javac -version

#Command to Find Java Location

dirname $(dirname $(readlink -f $(which java)))

#Download the latest hadoop setup file

wget https://dlcdn.apache.org/hadoop/common/hadoop-3.4.0/hadoop-3.4.0.tar.gz
#Unzip using tar

tar xzf hadoop-3.4.0.tar.gz

#Rename and Move the unzipped hadoop folder to home directory

mv hadoop-3.4.0 ~/hadoop

#Create datanode and namenode directories inside hadoop

cd ~/hadoop
mkdir dfsdata
cd dfsdata
mkdir -p {datanode,namenode}
cd ..
chmod -R 777 dfsdata
#Edit the Config Files in ~/hadoop/etc/hadoop/

# For core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>

# For hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>3</value>
</property><property>
<name>dfs.name.dir</name>
<value>/home/ubuntu/hadoop/dfsdata/namenode</value>
</property><property>
<name>dfs.data.dir</name>
<value>/home/ubuntu/hadoop/dfsdata/datanode</value>
</property>
# For mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>

# For yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property><property>
<name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
#Update hadoop-env.sh

export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64

#Update the Environment variable (replace ur path accordingly)


# update ~/.bashrc add the below at the end of the file
export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
export HADOOP_HOME=/home/ubuntu/hadoop
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE=$HADOOP_HOME/lib/native
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"

#Activate the Environment Variables


source ~/.bashrc
#Configure SSH Server to Create PasswordLess SSH Login
sudo apt install openssh-server openssh-client -y
ssh-keygen -t rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 640 ~/.ssh/authorized_keys

#Test the SSH login to localhost

sh localhost
give yes and exit
# Initializes or Resets the Hadoop HDFS by clearing the NameNode's
metadata and preparing the filesystem for use, erasing all stored data

hdfs namenode -format


#Start the HDFS daemons (NameNode, DataNode, and Secondary NameNode)

start-dfs.sh

#Start the YARN daemons (ResourceManager and NodeManager)

start-yarn.sh

#Check whether all services running using jps command

Test the Web Pages : http://localhost:9870 http://localhost:9864 http://localhost:8088


*****************************************************************************************************

stop-yarn.sh

stop-dfs.sh
INSTALL LATEST HIVE along with Hadoop 3.4.0

#For hive need to install java8

sudo apt install openjdk-8-jdk -y

#Modify the JAVA Path From 11 to 8 in


/home/hadoop/etc/hadoop/hadoop-env.sh &
/home/ubuntu/.bashrc

export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64

#Rebuild the Environment Variables

source ~/.bashrc

#Test Hadoop is Working ( with start-all.sh )

start-all.sh
#Update ~/hadoop/etc/hadoop/mapred-site.xml add following props

<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>

#Restart hadoop

stop-all.sh

start-all.sh
#Test mapreduce with a sample job

yarn jar
$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.0.jar pi
10 100

#Download apache-hive-4.0.0-bin.tar.gz
tar xzf apache-hive-4.0.0-bin.tar.gz

#Rename it to hive and keep it in Home directory


mv hive-4.0.0 ~/hive

#Create hive-env.sh and hive-site.xml using template


cd /home/ubuntu/hive/conf/

cp hive-env.sh.template ​ hive-env.sh

cp hive-default.xml.template hive-site.xml
#Edit hive.site.xml and below 2 props at the beginning
<property>
<name>system:java.io.tmpdir</name>
<value>/tmp/hive/java</value>
</property>
<property>
<name>system:user.name</name>
<value>${user.name}</value>
</property>
#Edit hive-site.xml find the below text and remove extra characters
between word "for transactional" (search for below text)
*******************************************************************************************************
Ensures commands with OVERWRITE (such as INSERT OVERWRITE) acquire Exclusive
locks

#Update the ~/.bashrc file

export HIVE_HOME=/home/ubuntu/hive
export HIVE_CONF_DIR=$HIVE_HOME/conf
export PATH=$PATH:$HIVE_HOME/bin
export CLASSPATH=$CLASSPATH:$HADOOP_HOME/lib/*:.
export CLASSPATH=$CLASSPATH:$HIVE_HOME/lib/*:.

#Rebuild the Environment Variable

source ~/.bashrc
#Create hdfs directories and give corresponding permissions
hdfs dfs -ls /
hdfs dfs -rm -r /user
hdfs dfs -ls /
hdfs dfs -mkdir -p /user/hive/warehouse

hdfs dfs -chmod g+w /user


hdfs dfs -chmod g+wx /user

hdfs dfs -chmod g+w /tmp


hdfs dfs -chmod g+wx /tmp

#Edit ~/hive/conf/hive-env.sh update commented one to


HADOOP_HOME=/home/ubuntu/hadoop
# Copy latest guava jar to hive command

rm -r /home/ubuntu/hive/lib/guava-*.jar

cp /home/ubuntu/hadoop/share/hadoop/common/lib/guava-27.0-jre.jar
/home/ubuntu/hive/lib

#Edit latest .sql file location and Comment First 2 Queries of DDL
Statements for functions with - - ( DDL statement for functions)

/home/ubuntu/hive/scripts/metastore/upgrade/derby/hive-schema-4.0.0.derby.sql
#Initialize Hive Metastore

schematool -dbType derby -initSchema

# Launch Hive to check

hive

!quit
# For better Visualisation use beeline
------------------------------------

beeline -u jdbc:hive2://

show databases;

create database college;

use college;

CREATE TABLE employee (​id INT,​name STRING,​ salary FLOAT)ROW FORMAT


DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;

SHOW TABLES;

INSERT INTO employee VALUES (1, 'John Doe', 50000.0);

SELECT * FROM employee;

!quit

You might also like