http://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.0.0
http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-3.0.0
http://mirrors.hust.edu.cn/apache/hadoop/common/hadoop-3.0.0
http://mirrors.shuosc.org/apache/hadoop/common/hadoop-3.0.0
http://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.0.0
http://www-eu.apache.org/dist/hadoop/common/hadoop-3.0.0/
http://www-us.apache.org/dist/hadoop/common/hadoop-3.0.0
https://pan.baidu.com/s/1nvj62Xb 我的网盘也可(包括hadoop3.0和jdk1.8)
bin:执行命令文件
etc:配置文件
sbin:集群命令文件
share\doc:文档
share\hadoop:所有jar包
share\hadoop\common:公共jar包
share\hadoop\mapreduce\hadoop-mapreduce-examples-3.0.0.jar(mapreduce可执行demo)
将jdk和hadoop的压缩包放在服务器的/root/myhadoop 目录下,解压
tar zxvf hadoop-3.0.0.tar.gz -C /root/myhadoop/
unzip jdk1.8.0_111.zip
#授权
chmod 777 -R /root/myhadoop/jdk1.8.0_111
vi ~/.bash_profile
#配置环境变量如下
JAVA_HOME=/root/myhadoop/jdk1.8.0_111
export JAVA_HOME
PATH=$JAVA_HOME/bin:$PATH
export PATH
HADOOP_HOME=/root/myhadoop/hadoop-3.0.0
export HADOOP_HOME
PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export PATH
#保存退出后设置生效
source ~/.bash_profile
注意:本地模式不能hdfs,只能测试mapreduce
修改配置文件:hadoop-env.sh
vi hadoop-env.sh
#执行:set number显示行号,37行配置jdk路径
JAVA_HOME=/root/myhadoop/jdk1.8.0_111
执行demo
#执行,可发现提供的若干方法
hadoop jar /root/myhadoop/hadoop-3.0.0/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.0.0.jar
#使用如下wordcount单词计数这个方法
#wordcount: A map/reduce program that counts the words in the input files.
#执行,可知wordcount的使用方式
hadoop jar /root/myhadoop/hadoop-3.0.0/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.0.0.jar wordcount
#Usage: wordcount <in> [<in>...] <out>
#建立测试输入文件/root/myhadoop/input/data1.txt
#内容如下
我 在 北京
我 在 北京 西直门
西直门 属于 北京
#建立结果输出位置:/root/myhadoop/output
#执行
hadoop jar /root/myhadoop/hadoop-3.0.0/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.0.0.jar wordcount /root/myhadoop/input/data1.txt /root/myhadoop/output/wordcountresult
#日志如下
2017-12-20 21:56:37,040 INFO mapreduce.Job: map 100% reduce 100%
2017-12-20 21:56:37,044 INFO mapreduce.Job: Job job_local1191731354_0001 completed successfully
#目录/root/myhadoop/output/wordcountresult 下会生成两个文件
计算结果:part-r-00000
状态:_SUCCESS
#查看计算结果,可见如下(排序按字母默认排序)
[root@Hadoopc1 wordcountresult]# cat part-r-00000
北京 3
在 2
属于 1
我 2
西直门 2
注意:依然在本机操作,具备Hadoop的所有功能,一台机器模拟一个分布式的环境
需要配置
Yarn也是主从结构的,分为主节点ResourceManager和从节点NodeManager
修改配置的文件如下:
#HDFS的配置
#hdfs-site.xml
冗余度:dfs.replication=1
检查权限:dfs.permissions=false
#core-site.xml
配置HDFS的NameNode:fs.defaultFS=hdfs://192.168.13.245:9000
配置DataNode保存数据的位置:hadoop.tmp.dir=/root/myhadoop/hadoop-3.0.0/tmp (没有先去创建)
#MapReduce的配置
#mapred-site.xml
配置MR运行的框架:mapreduce.framework.name=yarn
#yarn-site.xml
配置ResourceManager的地址:yarn.resourcemanager.hostname=192.168.13.245
配置NodeManager执行任务的方式:yarn.nodemanager.aux-services=mapreduce_shuffle
具体如下
vi hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
vi core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://192.168.13.245:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/root/myhadoop/hadoop-3.0.0/tmp</value>
</property>
</configuration>
vi mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
/root/myhadoop/hadoop-3.0.0/etc/*,
/root/myhadoop/hadoop-3.0.0/etc/hadoop/*,
/root/myhadoop/hadoop-3.0.0/lib/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/common/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/common/lib/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/mapreduce/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/mapreduce/lib-examples/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/hdfs/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/hdfs/lib/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/yarn/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/yarn/lib/*,
</value>
</property>
</configuration>
vi yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>192.168.13.245</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
/root/myhadoop/hadoop-3.0.0/etc/*,
/root/myhadoop/hadoop-3.0.0/etc/hadoop/*,
/root/myhadoop/hadoop-3.0.0/lib/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/common/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/common/lib/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/mapreduce/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/mapreduce/lib-examples/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/hdfs/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/hdfs/lib/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/yarn/*,
/root/myhadoop/hadoop-3.0.0/share/hadoop/yarn/lib/*,
</value>
</property>
</configuration>
格式化NameNode
hdfs namenode -format
#出现如下,格式化成功
2017-12-20 22:42:36,704 INFO common.Storage: Storage directory /root/myhadoop/hadoop-3.0.0/tmp/dfs/name has been successfully formatted.
启动,使用整合启动方式:start-all.sh
相当于分别执行HDFS: 存储数据和Yarn:执行计算
[root@Hadoopc1 hadoop]# start-all.sh
Starting namenodes on [192.168.13.245]
ERROR: Attempting to operate on hdfs namenode as root
ERROR: but there is no HDFS_NAMENODE_USER defined. Aborting operation.
Starting datanodes
ERROR: Attempting to operate on hdfs datanode as root
ERROR: but there is no HDFS_DATANODE_USER defined. Aborting operation.
Starting secondary namenodes [Hadoopc1.localdomain]
ERROR: Attempting to operate on hdfs secondarynamenode as root
ERROR: but there is no HDFS_SECONDARYNAMENODE_USER defined. Aborting operation.
Starting resourcemanager
ERROR: Attempting to operate on yarn resourcemanager as root
ERROR: but there is no YARN_RESOURCEMANAGER_USER defined. Aborting operation.
Starting nodemanagers
ERROR: Attempting to operate on yarn nodemanager as root
ERROR: but there is no YARN_NODEMANAGER_USER defined. Aborting operation.
解决缺少用户定义问题,修改如下4个文件
vi sbin/start-dfs.sh
vi sbin/stop-dfs.sh
添加
HDFS_DATANODE_USER=root
HADOOP_SECURE_DN_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vi sbin/start-yarn.sh
vi sbin/stop-yarn.sh
添加
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
再试执行,完毕
[root@Hadoopc1 sbin]# jps
7584 DataNode
9042 NodeManager
8923 ResourceManager
7468 NameNode
7869 SecondaryNameNode
12206 Jps
访问查看
HDFS:http://192.168.13.245:9870
Yarn:http://192.168.13.245:8088