$ sudo apt-get update
安装一些必要的包 $ sudo apt-get install \
apt-transport-https \
ca-certificates \
curl \
gnupg-agent \
software-properties-common
添加 Docker 官方 GPG Key$ curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
设置 Stable Repository$ sudo add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) \
stable"
安装 Docker Engine - community
更新源$ sudo apt-get update
安装最新版 Docker Engine$ sudo apt-get install docker-ce docker-ce-cli containerd.io
通过运行 hello-world 程序确定 Docker 成功安装$ sudo docker run hello-world
使用 Docker
为 Hadoop 集群构建一个虚拟的网络
$ sudo docker network create --driver=bridge hadoop
拉取 Ubuntu 镜像
$ sudo docker pull ubuntu
利用镜像创建容器
$ sudo docker run -it --name ubuntu-hadoop ubuntu
修改 apt 源为阿里镜像
进入配置文件目录$ cd /etc/apt
备份配置文件$ cp sources.list sources.list.bak
将配置内容 echo 进文件$ echo "deb http://mirrors.aliyun.com/ubuntu/ xenial main restricted universe multiverse
deb http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted universe multiverse
deb http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted universe multiverse
deb http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ xenial main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse" > /etc/apt/sources.list
更新源$ apt-get update
安装 Vim
$ apt install vim
安装 net-tools
$ apt install net-tools
安装 Java
$ apt install openjdk-8-jdk
安装 SSH
$ apt install openssh-server
安装 SSH 客户端
$ apt install openssh-client
配置 SSH
进入根目录$ cd ~
生成密钥(不用输入直接回车)$ ssh-keygen -t rsa -P ""
将公钥追加到 authorized_keys 文件中$ cat .ssh/id_rsa.pub >> .ssh/authorized_keys
启动 SSH 服务$ service ssh start
免密登陆自己$ ssh 127.0.0.1
编辑.bashrc$ vim ~/.bashrc
在文档的最后一行添加service ssh start
安装 Hadoop
下载安装文件 $ wget http://mirrors.hust.edu.cn/apache/hadoop/common/hadoop-3.2.1/hadoop-3.2.1.tar.gz
解压到 /usr/local 目录下面并重命名文件$ tar -zxvf hadoop-3.2.1.tar.gz -C /usr/local/
$ cd /usr/local/
$ mv hadoop-3.2.1 hadoop
修改 /etc/profile 文件,添加以下环境变量到文件中$ vim /etc/profile
#添加内容如下:
#java
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
#hadoop
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_CONF_DIR=$HADOOP_HOME
export HADOOP_LIBEXEC_DIR=$HADOOP_HOME/libexec
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH
export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
export HDFS_DATANODE_USER=root
export HDFS_DATANODE_SECURE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export HDFS_NAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
#使配置文件的修改生效
$ source /etc/profile
配置 Hadoop
设置当前工作目录cd /usr/local/hadoop/etc/hadoop
修改 hadoop-env.sh 文件,在文件末尾添加下信息 export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
修改 core-site.xml
fs.default.name
hdfs://master:9000
hadoop.tmp.dir
/home/hadoop3/hadoop/tmp
修改 hdfs-site.xml
dfs.replication
2
dfs.namenode.name.dir
/home/hadoop3/hadoop/hdfs/name
dfs.namenode.data.dir
/home/hadoop3/hadoop/hdfs/data
修改 mapred-site.xml
mapreduce.framework.name
yarn
mapreduce.application.classpath
/usr/local/hadoop/etc/hadoop,
/usr/local/hadoop/share/hadoop/common/*,
/usr/local/hadoop/share/hadoop/common/lib/*,
/usr/local/hadoop/share/hadoop/hdfs/*,
/usr/local/hadoop/share/hadoop/hdfs/lib/*,
/usr/local/hadoop/share/hadoop/mapreduce/*,
/usr/local/hadoop/share/hadoop/mapreduce/lib/*,
/usr/local/hadoop/share/hadoop/yarn/*,
/usr/local/hadoop/share/hadoop/yarn/lib/*
修改 yarn-site.xml
yarn.resourcemanager.hostname
master
yarn.nodemanager.aux-services
mapreduce_shuffle
修改 workermaster
slave1
slave2
在 Docker 中启动集群
将当前容器导出为镜像#用 docker ps -a 查询自己的 container ID
$ docker commit -a "damonchunglm" -m "my hadoop" b9336744997d myhadoop
利用导出的镜像创建 master 节点$ sudo docker run -itd --network hadoop -h "master" --name "master" -p 9870:9870 -p 8088:8088 myhadoop /bin/bash
分别创建 slave1 和 slave2 节点$ sudo docker run -itd --network hadoop -h "slave1" --name "slave1" myhadoop /bin/bash
$ sudo docker run -itd --network hadoop -h "slave2" --name "slave2" myhadoop /bin/bash
进入 master 节点 初始化 hdfs$ sudo docker attach master
$ cd /usr/local/hadoop/bin
$ ./hadoop namenode -format
启动服务$ cd ../sbin
$ ./start-all.sh