# 安装KDC服务
yum install krb5-server krb5-libs krb5-workstation
# 初始化KDC数据库
kdb5_util create -s -r EXAMPLE.COM<!-- core-site.xml -->
<property>
<name>hadoop.security.authentication</name>
<value>kerberos</value>
</property>
<property>
<name>hadoop.security.authorization</name>
<value>true</value>
</property># 安装Ranger插件
cp ranger-2.3.0-hdfs-plugin.tar.gz /usr/hdp/current/hadoop-client
tar -xzf ranger-2.3.0-hdfs-plugin.tar.gz
./enable-hdfs-plugin.sh{
"policyName": "finance-data-access",
"resources": {
"path": {
"values": ["/data/finance/*"],
"isRecursive": true
}
},
"policyItems": [
{
"accesses": [
{"type": "read"},
{"type": "write"}
],
"users": ["finance_team"],
"conditions": [
{"type": "access-time", "values": {"days": "mon-fri"}}
]
}
]
}<!-- core-site.xml -->
<property>
<name>fs.defaultFS</name>
<value>viewfs://clusterFed/</value>
</property>
<property>
<name>fs.viewfs.mounttable.clusterFed.link./data</name>
<value>hdfs://cluster1/data</value>
</property>viewfs://clusterFed/
├── /data -> hdfs://cluster1/data
├── /logs -> hdfs://cluster2/logs
└── /archive -> hdfs://cluster3/archive# 使用动态策略文件同步
hadoop distcp \
-strategy dynamic \
-bandwidth 100 \
-m 20 \
-update \
hdfs://cluster1/src hdfs://cluster2/target// 基于ECharts的同步状态可视化[^5]
option = {
series: [{
type: 'gauge',
data: [{
value: syncProgress,
name: '同步进度'
}]
}]
}FROM openjdk:8-jre
RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz
RUN tar -xzf hadoop-3.3.1.tar.gz -C /opt/
ENV HADOOP_HOME=/opt/hadoop-3.3.1# hadoop-cluster.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: datanode
spec:
serviceName: "hadoop-dn"
replicas: 3
template:
spec:
containers:
- name: datanode
image: hadoop-dn:3.3.1
ports:
- containerPort: 9864
volumeMounts:
- mountPath: /hadoop/dfs/data
name: hadoop-data存储类型 | 适用场景 | 性能指标 |
|---|---|---|
hostPath | 开发测试环境 | 低延迟 |
NFS | 中小规模生产 | 吞吐量50MB/s |
Ceph RBD | 大规模集群 | IOPS 3000+ |
# 检查关键参数
hdfs getconf -confKey dfs.namenode.handler.count
yarn node -list | grep 'CPU VCores'# 运行TeraSort测试
hadoop jar hadoop-mapreduce-examples.jar terasort \
-Dmapreduce.job.maps=100 \
/input /outputgraph TD A[服务异常] --> B{日志分析} B -->|NameNode| C[检查fsimage完整性] B -->|DataNode| D[验证磁盘空间] B -->|YARN| E[检查资源超配]
# 修复HDFS块丢失
hdfs fsck / -files -blocks -locations
hdfs dfsadmin -recoverLease -path /corrupt/file