Skip to content

Instantly share code, notes, and snippets.

@kangvcar
Last active May 29, 2025 10:04
Show Gist options
  • Select an option

  • Save kangvcar/5b7238a8e2ff933c73112b382c8c2c67 to your computer and use it in GitHub Desktop.

Select an option

Save kangvcar/5b7238a8e2ff933c73112b382c8c2c67 to your computer and use it in GitHub Desktop.
#!/bin/bash
# 设置颜色代码
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m' # 无颜色
# 函数定义:显示信息
show_info() {
echo -e "${CYAN}[i] $1${NC}"
}
# 函数定义:显示成功信息
show_success() {
echo -e "${GREEN}[✓] $1${NC}"
}
# 函数定义:显示错误信息
show_error() {
echo -e "${RED}[✗] $1${NC}"
exit 1
}
# 函数定义:显示警告信息
show_warning() {
echo -e "${YELLOW}[!] $1${NC}"
}
# 函数定义:检查命令是否成功
check_success() {
if [ $? -ne 0 ]; then
show_error "执行命令失败: $1"
else
show_success "$1"
fi
}
# 函数定义:检查命令是否存在
check_command() {
if ! command -v $1 &> /dev/null; then
show_error "命令 '$1' 未找到,请确保已正确安装并配置环境变量"
fi
}
# 函数定义:加载环境变量
load_environment() {
show_info "加载环境变量..."
# 检查配置文件是否存在
if [ ! -f /etc/profile ]; then
show_error "未找到 /etc/profile 文件"
fi
# 加载环境变量
source /etc/profile
# 检查必要的环境变量
if [ -z "$HADOOP_HOME" ]; then
show_error "HADOOP_HOME 环境变量未设置"
fi
if [ -z "$JAVA_HOME" ]; then
show_error "JAVA_HOME 环境变量未设置"
fi
# 检查必要的命令
check_command java
check_command hadoop
check_command hdfs
show_success "环境变量加载完成"
}
# 显示标题
clear
echo -e "${BOLD}${BLUE}"
echo "=============================================================="
echo " Spark 集群自动部署脚本 v1.0 "
echo "=============================================================="
echo -e "${NC}"
# 加载环境变量
load_environment
# 步骤 1: 确认 Hadoop 环境
show_info "检查 Hadoop 环境..."
if [ ! -d "/opt/hadoop" ]; then
show_error "未找到 Hadoop 安装目录,请先安装 Hadoop"
fi
# 检查Hadoop服务状态
show_info "检查Hadoop服务状态..."
if ! jps | grep -q "NameNode"; then
show_warning "NameNode未运行,尝试启动Hadoop服务..."
start-dfs.sh > /dev/null 2>&1
if [ $? -ne 0 ]; then
show_error "启动Hadoop服务失败,请手动检查Hadoop服务状态"
fi
show_success "Hadoop服务已启动"
fi
# 检查HDFS安全模式
show_info "检查HDFS安全模式..."
if ! hdfs dfsadmin -safemode get 2>/dev/null | grep -q "Safe mode is OFF"; then
show_warning "HDFS处于安全模式,尝试退出安全模式..."
hdfs dfsadmin -safemode leave > /dev/null 2>&1
if [ $? -ne 0 ]; then
show_error "无法退出HDFS安全模式,请检查HDFS服务状态"
fi
show_success "已退出HDFS安全模式"
fi
# 获取节点信息
show_info "获取集群节点信息..."
CLUSTER_NODES=$(cat /opt/hadoop/etc/hadoop/workers | sort | uniq)
NODE_COUNT=$(echo "$CLUSTER_NODES" | wc -l)
MASTER_NODE="master"
show_success "发现集群共有 $NODE_COUNT 个节点"
echo -e "${CYAN}节点列表:${NC}"
echo "$CLUSTER_NODES"
# 步骤 2: 下载 Spark
show_info "准备下载 Spark 安装包..."
# 创建软件目录
if [ ! -d /opt/software ]; then
mkdir -p /opt/software
show_success "创建软件存放目录: /opt/software"
fi
# 切换到软件目录
cd /opt/software
# Spark 版本和下载地址
SPARK_VERSION="3.5.6"
HADOOP_COMPAT_VERSION="3"
SPARK_FILE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_COMPAT_VERSION}.tgz"
SPARK_URL="https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/${SPARK_FILE}"
if [ ! -f "$SPARK_FILE" ]; then
show_info "下载 Spark ${SPARK_VERSION} 安装包..."
echo -e "${YELLOW}下载可能需要几分钟时间,请耐心等待...${NC}"
# 使用 wget 下载
wget --no-check-certificate --progress=dot:giga $SPARK_URL 2>&1
if [ -f "$SPARK_FILE" ]; then
check_success "下载 Spark 安装包完成"
else
show_error "下载 Spark 安装包失败"
fi
else
show_success "Spark 安装包已存在,无需重新下载"
fi
# 步骤 3: 解压安装 Spark
show_info "解压安装 Spark..."
if [ ! -d "/opt/spark" ]; then
tar -zxf $SPARK_FILE -C /opt/ > /dev/null 2>&1
check_success "解压 Spark 到 /opt 目录"
# 重命名 Spark 目录
EXTRACTED_DIR=$(ls -d /opt/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_COMPAT_VERSION}* 2>/dev/null)
if [ -n "$EXTRACTED_DIR" ]; then
mv $EXTRACTED_DIR /opt/spark
check_success "重命名 Spark 目录为 /opt/spark"
else
show_error "Spark 解压目录不存在,请检查安装包是否正确"
fi
else
show_success "Spark 已安装在 /opt/spark 目录"
fi
# 步骤 4: 配置环境变量
show_info "配置 Spark 环境变量..."
if ! grep -q "SPARK_HOME=/opt/spark" /etc/profile; then
cat >> /etc/profile << EOF
# Spark 环境变量
export SPARK_HOME=/opt/spark
export PATH=\$PATH:\$SPARK_HOME/bin:\$SPARK_HOME/sbin
EOF
check_success "配置 Spark 环境变量到 /etc/profile"
# 使环境变量生效
source /etc/profile
show_info "Spark 环境变量已生效"
else
show_success "Spark 环境变量已存在"
fi
# 步骤 5: 配置 Spark
show_info "配置 Spark 环境..."
# 创建配置目录
mkdir -p /opt/spark/conf
# 复制示例配置
cp /opt/spark/conf/spark-env.sh.template /opt/spark/conf/spark-env.sh
cp /opt/spark/conf/spark-defaults.conf.template /opt/spark/conf/spark-defaults.conf
# 配置 spark-env.sh
show_info "配置 spark-env.sh..."
# 获取公网IP
show_info "获取当前服务器公网IP..."
MASTER_PUBLIC_IP=$(curl -s ifconfig.me 2>/dev/null || echo "无法获取公网IP")
if [[ "$MASTER_PUBLIC_IP" == "无法获取公网IP" ]]; then
show_warning "无法获取公网IP,将使用主机名"
MASTER_PUBLIC_IP="master"
else
show_success "获取公网IP成功: $MASTER_PUBLIC_IP"
fi
# 备份原始配置文件
if [ -f /opt/spark/conf/spark-env.sh ]; then
cp /opt/spark/conf/spark-env.sh /opt/spark/conf/spark-env.sh.bak.$(date +%Y%m%d%H%M%S)
fi
cat > /opt/spark/conf/spark-env.sh << EOF
# Java 路径
export JAVA_HOME=/opt/java8
export HADOOP_HOME=/opt/hadoop
export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop
# Spark集群管理配置
export SPARK_MASTER_HOST=master
export SPARK_MASTER_PORT=7077
# 资源配置 - 为3节点4核8G的集群合理分配资源
export SPARK_WORKER_CORES=3 # 每个Worker使用3核,预留1核给系统
export SPARK_WORKER_MEMORY=6g # 每个Worker使用6G内存,预留2G给系统
export SPARK_DRIVER_MEMORY=2g # Driver程序使用2G内存
export SPARK_EXECUTOR_MEMORY=2g # 每个Executor使用2G内存
export SPARK_EXECUTOR_CORES=1 # 每个Executor使用1核
export SPARK_EXECUTOR_INSTANCES=2 # 每个Worker上运行2个Executor
# 历史服务器配置
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.fs.logDirectory=hdfs:///spark-history -Dspark.history.retainedApplications=20 -Dspark.history.ui.address=http://${MASTER_PUBLIC_IP}:18080"
EOF
check_success "配置 spark-env.sh 完成"
# 配置 spark-defaults.conf
show_info "配置 spark-defaults.conf..."
# 备份原始配置文件
if [ -f /opt/spark/conf/spark-defaults.conf ]; then
cp /opt/spark/conf/spark-defaults.conf /opt/spark/conf/spark-defaults.conf.bak.$(date +%Y%m%d%H%M%S)
fi
cat > /opt/spark/conf/spark-defaults.conf << EOF
# Spark 默认配置
spark.master yarn
spark.eventLog.enabled true
spark.eventLog.dir hdfs:///spark-history
spark.history.fs.logDirectory hdfs:///spark-history
spark.history.ui.address http://${MASTER_PUBLIC_IP}:18080
# 资源配置 - 为YARN模式设置合理的资源分配
spark.driver.memory 2g
spark.executor.memory 2g
spark.executor.cores 1
spark.executor.instances 6
spark.dynamicAllocation.enabled true
spark.dynamicAllocation.initialExecutors 2
spark.dynamicAllocation.minExecutors 2
spark.dynamicAllocation.maxExecutors 6
# 性能优化
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.kryoserializer.buffer.max 128m
spark.network.timeout 600s
spark.local.dir /tmp/spark-temp
EOF
check_success "配置 spark-defaults.conf 完成"
# 配置 workers 文件
show_info "配置 workers 文件..."
cp /opt/spark/conf/workers.template /opt/spark/conf/workers
> /opt/spark/conf/workers
echo "$CLUSTER_NODES" > /opt/spark/conf/workers
check_success "配置 workers 文件完成"
# 步骤 6: 在 HDFS 创建 Spark 所需目录
show_info "在 HDFS 上创建 Spark 所需的目录..."
# 创建日志目录
show_info "创建 Spark 历史日志目录..."
if ! hdfs dfs -test -d /spark-history > /dev/null 2>&1; then
hdfs dfs -mkdir -p /spark-history > /dev/null 2>&1
if [ $? -eq 0 ]; then
hdfs dfs -chmod -R 777 /spark-history > /dev/null 2>&1
check_success "创建 Spark 历史日志目录"
else
show_error "创建 Spark 历史日志目录失败,请检查HDFS权限和空间"
fi
else
show_success "Spark 历史日志目录已存在"
fi
# 创建缓存目录
show_info "创建 Spark 缓存目录..."
if ! hdfs dfs -test -d /spark > /dev/null 2>&1; then
hdfs dfs -mkdir -p /spark > /dev/null 2>&1
if [ $? -eq 0 ]; then
hdfs dfs -mkdir -p /spark/jars > /dev/null 2>&1
hdfs dfs -chmod -R 777 /spark > /dev/null 2>&1
check_success "创建 Spark 缓存目录"
else
show_error "创建 Spark 缓存目录失败,请检查HDFS权限和空间"
fi
else
show_success "Spark 缓存目录已存在"
fi
# 步骤 7: 创建 Spark Jar 档案
show_info "创建 Spark Jar 档案文件..."
CURRENT_DIR=$(pwd)
cd /opt/spark
# 检查是否已存在JAR文件
if ! hdfs dfs -test -f /spark/jars/spark-libs.jar > /dev/null 2>&1; then
# 创建临时目录用于打包
TEMP_DIR=$(mktemp -d)
cp -r /opt/spark/jars/* $TEMP_DIR/
# 创建JAR文件
jar cvf /opt/software/spark-libs.jar -C $TEMP_DIR . > /dev/null 2>&1
if [ $? -eq 0 ]; then
# 上传到HDFS
hdfs dfs -put -f /opt/software/spark-libs.jar /spark/jars/ > /dev/null 2>&1
if [ $? -eq 0 ]; then
check_success "创建并上传 Spark Jar 档案文件"
else
show_error "上传 Spark Jar 档案文件到HDFS失败"
fi
else
show_error "创建 Spark Jar 档案文件失败"
fi
# 清理临时目录
rm -rf $TEMP_DIR
else
show_success "Spark Jar 档案文件已存在"
fi
cd $CURRENT_DIR
# 步骤 8: 同步 Spark 到所有节点
show_info "同步 Spark 到所有节点..."
for node in $CLUSTER_NODES; do
if [ "$node" != "$MASTER_NODE" ]; then
show_info "同步 Spark 到 $node 节点..."
# 同步 Spark 目录和环境变量
scp -r /opt/spark $node:/opt/ > /dev/null 2>&1
scp -r /etc/profile $node:/etc/ > /dev/null 2>&1
# 更新环境变量
ssh $node "source /etc/profile" > /dev/null 2>&1
check_success "同步 Spark 到 $node 节点完成"
fi
done
# 步骤 9: 启动 Spark 相关服务
show_info "启动 Spark 相关服务..."
# 检查Spark History Server是否已经在运行
show_info "检查 Spark History Server 状态..."
if pgrep -f "org.apache.spark.deploy.history.HistoryServer" > /dev/null; then
show_success "Spark History Server 已在运行"
else
show_info "启动 Spark History Server..."
# 检查必要的目录和权限
if [ ! -d "/opt/spark/logs" ]; then
mkdir -p /opt/spark/logs
chmod 777 /opt/spark/logs
fi
# 检查HDFS目录权限
if ! hdfs dfs -test -d /spark-history > /dev/null 2>&1; then
show_error "Spark历史日志目录不存在,请先运行脚本创建必要的HDFS目录"
fi
# 检查端口是否被占用
if netstat -tuln | grep -q ":18080"; then
show_warning "端口18080已被占用,尝试停止现有服务..."
/opt/spark/sbin/stop-history-server.sh > /dev/null 2>&1
sleep 5
fi
# 启动服务并等待
/opt/spark/sbin/start-history-server.sh > /dev/null 2>&1
# 等待服务启动
show_info "等待服务启动..."
for i in {1..30}; do
if pgrep -f "org.apache.spark.deploy.history.HistoryServer" > /dev/null; then
# 检查服务是否真正可用
if curl -s "http://localhost:18080" > /dev/null 2>&1; then
show_success "Spark History Server 启动成功"
break
fi
fi
if [ $i -eq 30 ]; then
show_error "Spark History Server 启动超时,请检查日志文件: /opt/spark/logs/spark-*-org.apache.spark.deploy.history.HistoryServer-*.out"
fi
echo -ne "${YELLOW}等待服务启动: $i/30 秒${NC}\r"
sleep 1
done
fi
# 显示服务状态
show_info "Spark History Server 状态:"
if curl -s "http://localhost:18080" > /dev/null 2>&1; then
show_success "服务正常运行在 http://localhost:18080"
else
show_warning "服务可能未正常运行,请检查日志文件"
fi
echo -e "${BOLD}${BLUE}🌐 Web管理界面:${NC}"
echo -e "┌───────────────────────────────────────────────────────────────────┐"
echo -e "│ HDFS管理界面: ${BOLD}http://$MASTER_PUBLIC_IP:9870/${NC} "
echo -e "│ YARN资源管理界面: ${BOLD}http://$MASTER_PUBLIC_IP:8088/${NC} "
echo -e "│ MapReduce历史服务器: ${BOLD}http://$MASTER_PUBLIC_IP:19888/${NC} "
echo -e "│ Spark History Server: ${BOLD}http://$MASTER_PUBLIC_IP:18080/${NC} "
echo -e "└───────────────────────────────────────────────────────────────────┘"
# 提供 Spark 使用说明
echo
echo -e "${BOLD}${GREEN}✅ Spark 集群部署完成${NC}"
echo
echo -e "${BOLD}${CYAN}🖥️ Spark 集群信息:${NC}"
echo -e "┌───────────────────────────────────────────────────────────────────┐"
echo -e "│ Spark 版本: ${BOLD}${SPARK_VERSION}${NC} "
echo -e "│ Spark 主节点: ${BOLD}${MASTER_NODE}${NC} "
echo -e "│ Spark 运行模式: ${BOLD}YARN (集群模式)${NC} "
echo -e "│ Spark History Server: ${BOLD}http://$MASTER_PUBLIC_IP:18080${NC} "
echo -e "└───────────────────────────────────────────────────────────────────┘"
echo
echo -e "${BOLD}${YELLOW}📋 Spark 使用说明:${NC}"
echo -e "┌───────────────────────────────────────────────────────────────────┐"
echo -e "│ 1. 提交 Spark 应用到 YARN(集群模式): "
echo -e "│ spark-submit --master yarn --deploy-mode cluster \\ "
echo -e "│ --driver-memory 2g --executor-memory 2g \\ "
echo -e "│ --executor-cores 1 --num-executors 3 \\ "
echo -e "│ --class org.example.SparkApp path/to/app.jar [参数] "
echo -e "│ "
echo -e "│ 2. 提交 Spark 应用到 YARN(客户端模式): "
echo -e "│ spark-submit --master yarn --deploy-mode client \\ "
echo -e "│ --driver-memory 2g --executor-memory 2g \\ "
echo -e "│ --executor-cores 1 --num-executors 3 \\ "
echo -e "│ --class org.example.SparkApp path/to/app.jar [参数] "
echo -e "│ "
echo -e "│ 3. 启动交互式 Spark Shell: "
echo -e "│ spark-shell --master yarn "
echo -e "│ "
echo -e "│ 4. 启动 PySpark 交互式环境: "
echo -e "│ pyspark --master yarn "
echo -e "│ "
echo -e "│ 5. 启动 Spark SQL 命令行: "
echo -e "│ spark-sql --master yarn "
echo -e "│ "
echo -e "│ 6. Spark 集群管理命令: "
echo -e "│ - 启动历史服务器: $SPARK_HOME/sbin/start-history-server.sh "
echo -e "│ - 停止历史服务器: $SPARK_HOME/sbin/stop-history-server.sh "
echo -e "└───────────────────────────────────────────────────────────────────┘"
echo
echo -e "${BOLD}${BLUE}🔍 Spark 示例程序:${NC}"
echo
echo -e "${BOLD}1. 计算π值示例(client模式):${NC}"
echo
echo -e "命令:"
echo -e "spark-submit --class org.apache.spark.examples.SparkPi \\"
echo -e " --master yarn --deploy-mode client \\"
echo -e " --driver-memory 2g --executor-memory 2g \\"
echo -e " --executor-cores 1 --num-executors 3 \\"
echo -e " /opt/spark/examples/jars/spark-examples_2.12-3.5.6.jar 10"
echo
echo -e "${BOLD}2. WordCount 示例(PySpark):${NC}"
echo
echo -e "步骤1: 创建测试文件"
echo -e "cat > /tmp/words.txt << EOF"
echo -e "Hello Spark Hadoop Spark Hello"
echo -e "Big Data Processing with Spark"
echo -e "Spark is faster than MapReduce"
echo -e "EOF"
echo
echo -e "步骤2: 上传到HDFS"
echo -e "hdfs dfs -mkdir -p /spark/input"
echo -e "hdfs dfs -put /tmp/words.txt /spark/input/"
echo
echo -e "步骤3: 启动PySpark并执行"
echo -e "pyspark --master yarn"
echo
echo -e "步骤4: 执行以下Python代码"
echo -e "text_file = spark.read.text(\"hdfs:///spark/input/words.txt\")"
echo -e "counts = text_file.rdd.flatMap(lambda line: line.value.split(\" \"))\\"
echo -e " .filter(lambda word: word.strip() != \"\") \\"
echo -e " .map(lambda word: (word, 1)) \\"
echo -e " .reduceByKey(lambda a, b: a + b) \\"
echo -e " .toDF([\"word\", \"count\"])"
echo -e "counts.show()"
echo -e "counts.write.csv(\"hdfs:///spark/output/wordcount-py-result\")"
echo
echo -e "${BOLD}${BLUE}=============================================================="
echo -e " Spark 集群自动部署脚本执行完毕 "
echo -e "==============================================================${NC}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment