Last active
May 29, 2025 10:04
-
-
Save kangvcar/5b7238a8e2ff933c73112b382c8c2c67 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # 设置颜色代码 | |
| RED='\033[0;31m' | |
| GREEN='\033[0;32m' | |
| YELLOW='\033[0;33m' | |
| BLUE='\033[0;34m' | |
| CYAN='\033[0;36m' | |
| BOLD='\033[1m' | |
| NC='\033[0m' # 无颜色 | |
| # 函数定义:显示信息 | |
| show_info() { | |
| echo -e "${CYAN}[i] $1${NC}" | |
| } | |
| # 函数定义:显示成功信息 | |
| show_success() { | |
| echo -e "${GREEN}[✓] $1${NC}" | |
| } | |
| # 函数定义:显示错误信息 | |
| show_error() { | |
| echo -e "${RED}[✗] $1${NC}" | |
| exit 1 | |
| } | |
| # 函数定义:显示警告信息 | |
| show_warning() { | |
| echo -e "${YELLOW}[!] $1${NC}" | |
| } | |
| # 函数定义:检查命令是否成功 | |
| check_success() { | |
| if [ $? -ne 0 ]; then | |
| show_error "执行命令失败: $1" | |
| else | |
| show_success "$1" | |
| fi | |
| } | |
| # 函数定义:检查命令是否存在 | |
| check_command() { | |
| if ! command -v $1 &> /dev/null; then | |
| show_error "命令 '$1' 未找到,请确保已正确安装并配置环境变量" | |
| fi | |
| } | |
| # 函数定义:加载环境变量 | |
| load_environment() { | |
| show_info "加载环境变量..." | |
| # 检查配置文件是否存在 | |
| if [ ! -f /etc/profile ]; then | |
| show_error "未找到 /etc/profile 文件" | |
| fi | |
| # 加载环境变量 | |
| source /etc/profile | |
| # 检查必要的环境变量 | |
| if [ -z "$HADOOP_HOME" ]; then | |
| show_error "HADOOP_HOME 环境变量未设置" | |
| fi | |
| if [ -z "$JAVA_HOME" ]; then | |
| show_error "JAVA_HOME 环境变量未设置" | |
| fi | |
| # 检查必要的命令 | |
| check_command java | |
| check_command hadoop | |
| check_command hdfs | |
| show_success "环境变量加载完成" | |
| } | |
| # 显示标题 | |
| clear | |
| echo -e "${BOLD}${BLUE}" | |
| echo "==============================================================" | |
| echo " Spark 集群自动部署脚本 v1.0 " | |
| echo "==============================================================" | |
| echo -e "${NC}" | |
| # 加载环境变量 | |
| load_environment | |
| # 步骤 1: 确认 Hadoop 环境 | |
| show_info "检查 Hadoop 环境..." | |
| if [ ! -d "/opt/hadoop" ]; then | |
| show_error "未找到 Hadoop 安装目录,请先安装 Hadoop" | |
| fi | |
| # 检查Hadoop服务状态 | |
| show_info "检查Hadoop服务状态..." | |
| if ! jps | grep -q "NameNode"; then | |
| show_warning "NameNode未运行,尝试启动Hadoop服务..." | |
| start-dfs.sh > /dev/null 2>&1 | |
| if [ $? -ne 0 ]; then | |
| show_error "启动Hadoop服务失败,请手动检查Hadoop服务状态" | |
| fi | |
| show_success "Hadoop服务已启动" | |
| fi | |
| # 检查HDFS安全模式 | |
| show_info "检查HDFS安全模式..." | |
| if ! hdfs dfsadmin -safemode get 2>/dev/null | grep -q "Safe mode is OFF"; then | |
| show_warning "HDFS处于安全模式,尝试退出安全模式..." | |
| hdfs dfsadmin -safemode leave > /dev/null 2>&1 | |
| if [ $? -ne 0 ]; then | |
| show_error "无法退出HDFS安全模式,请检查HDFS服务状态" | |
| fi | |
| show_success "已退出HDFS安全模式" | |
| fi | |
| # 获取节点信息 | |
| show_info "获取集群节点信息..." | |
| CLUSTER_NODES=$(cat /opt/hadoop/etc/hadoop/workers | sort | uniq) | |
| NODE_COUNT=$(echo "$CLUSTER_NODES" | wc -l) | |
| MASTER_NODE="master" | |
| show_success "发现集群共有 $NODE_COUNT 个节点" | |
| echo -e "${CYAN}节点列表:${NC}" | |
| echo "$CLUSTER_NODES" | |
| # 步骤 2: 下载 Spark | |
| show_info "准备下载 Spark 安装包..." | |
| # 创建软件目录 | |
| if [ ! -d /opt/software ]; then | |
| mkdir -p /opt/software | |
| show_success "创建软件存放目录: /opt/software" | |
| fi | |
| # 切换到软件目录 | |
| cd /opt/software | |
| # Spark 版本和下载地址 | |
| SPARK_VERSION="3.5.6" | |
| HADOOP_COMPAT_VERSION="3" | |
| SPARK_FILE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_COMPAT_VERSION}.tgz" | |
| SPARK_URL="https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/${SPARK_FILE}" | |
| if [ ! -f "$SPARK_FILE" ]; then | |
| show_info "下载 Spark ${SPARK_VERSION} 安装包..." | |
| echo -e "${YELLOW}下载可能需要几分钟时间,请耐心等待...${NC}" | |
| # 使用 wget 下载 | |
| wget --no-check-certificate --progress=dot:giga $SPARK_URL 2>&1 | |
| if [ -f "$SPARK_FILE" ]; then | |
| check_success "下载 Spark 安装包完成" | |
| else | |
| show_error "下载 Spark 安装包失败" | |
| fi | |
| else | |
| show_success "Spark 安装包已存在,无需重新下载" | |
| fi | |
| # 步骤 3: 解压安装 Spark | |
| show_info "解压安装 Spark..." | |
| if [ ! -d "/opt/spark" ]; then | |
| tar -zxf $SPARK_FILE -C /opt/ > /dev/null 2>&1 | |
| check_success "解压 Spark 到 /opt 目录" | |
| # 重命名 Spark 目录 | |
| EXTRACTED_DIR=$(ls -d /opt/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_COMPAT_VERSION}* 2>/dev/null) | |
| if [ -n "$EXTRACTED_DIR" ]; then | |
| mv $EXTRACTED_DIR /opt/spark | |
| check_success "重命名 Spark 目录为 /opt/spark" | |
| else | |
| show_error "Spark 解压目录不存在,请检查安装包是否正确" | |
| fi | |
| else | |
| show_success "Spark 已安装在 /opt/spark 目录" | |
| fi | |
| # 步骤 4: 配置环境变量 | |
| show_info "配置 Spark 环境变量..." | |
| if ! grep -q "SPARK_HOME=/opt/spark" /etc/profile; then | |
| cat >> /etc/profile << EOF | |
| # Spark 环境变量 | |
| export SPARK_HOME=/opt/spark | |
| export PATH=\$PATH:\$SPARK_HOME/bin:\$SPARK_HOME/sbin | |
| EOF | |
| check_success "配置 Spark 环境变量到 /etc/profile" | |
| # 使环境变量生效 | |
| source /etc/profile | |
| show_info "Spark 环境变量已生效" | |
| else | |
| show_success "Spark 环境变量已存在" | |
| fi | |
| # 步骤 5: 配置 Spark | |
| show_info "配置 Spark 环境..." | |
| # 创建配置目录 | |
| mkdir -p /opt/spark/conf | |
| # 复制示例配置 | |
| cp /opt/spark/conf/spark-env.sh.template /opt/spark/conf/spark-env.sh | |
| cp /opt/spark/conf/spark-defaults.conf.template /opt/spark/conf/spark-defaults.conf | |
| # 配置 spark-env.sh | |
| show_info "配置 spark-env.sh..." | |
| # 获取公网IP | |
| show_info "获取当前服务器公网IP..." | |
| MASTER_PUBLIC_IP=$(curl -s ifconfig.me 2>/dev/null || echo "无法获取公网IP") | |
| if [[ "$MASTER_PUBLIC_IP" == "无法获取公网IP" ]]; then | |
| show_warning "无法获取公网IP,将使用主机名" | |
| MASTER_PUBLIC_IP="master" | |
| else | |
| show_success "获取公网IP成功: $MASTER_PUBLIC_IP" | |
| fi | |
| # 备份原始配置文件 | |
| if [ -f /opt/spark/conf/spark-env.sh ]; then | |
| cp /opt/spark/conf/spark-env.sh /opt/spark/conf/spark-env.sh.bak.$(date +%Y%m%d%H%M%S) | |
| fi | |
| cat > /opt/spark/conf/spark-env.sh << EOF | |
| # Java 路径 | |
| export JAVA_HOME=/opt/java8 | |
| export HADOOP_HOME=/opt/hadoop | |
| export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop | |
| # Spark集群管理配置 | |
| export SPARK_MASTER_HOST=master | |
| export SPARK_MASTER_PORT=7077 | |
| # 资源配置 - 为3节点4核8G的集群合理分配资源 | |
| export SPARK_WORKER_CORES=3 # 每个Worker使用3核,预留1核给系统 | |
| export SPARK_WORKER_MEMORY=6g # 每个Worker使用6G内存,预留2G给系统 | |
| export SPARK_DRIVER_MEMORY=2g # Driver程序使用2G内存 | |
| export SPARK_EXECUTOR_MEMORY=2g # 每个Executor使用2G内存 | |
| export SPARK_EXECUTOR_CORES=1 # 每个Executor使用1核 | |
| export SPARK_EXECUTOR_INSTANCES=2 # 每个Worker上运行2个Executor | |
| # 历史服务器配置 | |
| export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.fs.logDirectory=hdfs:///spark-history -Dspark.history.retainedApplications=20 -Dspark.history.ui.address=http://${MASTER_PUBLIC_IP}:18080" | |
| EOF | |
| check_success "配置 spark-env.sh 完成" | |
| # 配置 spark-defaults.conf | |
| show_info "配置 spark-defaults.conf..." | |
| # 备份原始配置文件 | |
| if [ -f /opt/spark/conf/spark-defaults.conf ]; then | |
| cp /opt/spark/conf/spark-defaults.conf /opt/spark/conf/spark-defaults.conf.bak.$(date +%Y%m%d%H%M%S) | |
| fi | |
| cat > /opt/spark/conf/spark-defaults.conf << EOF | |
| # Spark 默认配置 | |
| spark.master yarn | |
| spark.eventLog.enabled true | |
| spark.eventLog.dir hdfs:///spark-history | |
| spark.history.fs.logDirectory hdfs:///spark-history | |
| spark.history.ui.address http://${MASTER_PUBLIC_IP}:18080 | |
| # 资源配置 - 为YARN模式设置合理的资源分配 | |
| spark.driver.memory 2g | |
| spark.executor.memory 2g | |
| spark.executor.cores 1 | |
| spark.executor.instances 6 | |
| spark.dynamicAllocation.enabled true | |
| spark.dynamicAllocation.initialExecutors 2 | |
| spark.dynamicAllocation.minExecutors 2 | |
| spark.dynamicAllocation.maxExecutors 6 | |
| # 性能优化 | |
| spark.serializer org.apache.spark.serializer.KryoSerializer | |
| spark.kryoserializer.buffer.max 128m | |
| spark.network.timeout 600s | |
| spark.local.dir /tmp/spark-temp | |
| EOF | |
| check_success "配置 spark-defaults.conf 完成" | |
| # 配置 workers 文件 | |
| show_info "配置 workers 文件..." | |
| cp /opt/spark/conf/workers.template /opt/spark/conf/workers | |
| > /opt/spark/conf/workers | |
| echo "$CLUSTER_NODES" > /opt/spark/conf/workers | |
| check_success "配置 workers 文件完成" | |
| # 步骤 6: 在 HDFS 创建 Spark 所需目录 | |
| show_info "在 HDFS 上创建 Spark 所需的目录..." | |
| # 创建日志目录 | |
| show_info "创建 Spark 历史日志目录..." | |
| if ! hdfs dfs -test -d /spark-history > /dev/null 2>&1; then | |
| hdfs dfs -mkdir -p /spark-history > /dev/null 2>&1 | |
| if [ $? -eq 0 ]; then | |
| hdfs dfs -chmod -R 777 /spark-history > /dev/null 2>&1 | |
| check_success "创建 Spark 历史日志目录" | |
| else | |
| show_error "创建 Spark 历史日志目录失败,请检查HDFS权限和空间" | |
| fi | |
| else | |
| show_success "Spark 历史日志目录已存在" | |
| fi | |
| # 创建缓存目录 | |
| show_info "创建 Spark 缓存目录..." | |
| if ! hdfs dfs -test -d /spark > /dev/null 2>&1; then | |
| hdfs dfs -mkdir -p /spark > /dev/null 2>&1 | |
| if [ $? -eq 0 ]; then | |
| hdfs dfs -mkdir -p /spark/jars > /dev/null 2>&1 | |
| hdfs dfs -chmod -R 777 /spark > /dev/null 2>&1 | |
| check_success "创建 Spark 缓存目录" | |
| else | |
| show_error "创建 Spark 缓存目录失败,请检查HDFS权限和空间" | |
| fi | |
| else | |
| show_success "Spark 缓存目录已存在" | |
| fi | |
| # 步骤 7: 创建 Spark Jar 档案 | |
| show_info "创建 Spark Jar 档案文件..." | |
| CURRENT_DIR=$(pwd) | |
| cd /opt/spark | |
| # 检查是否已存在JAR文件 | |
| if ! hdfs dfs -test -f /spark/jars/spark-libs.jar > /dev/null 2>&1; then | |
| # 创建临时目录用于打包 | |
| TEMP_DIR=$(mktemp -d) | |
| cp -r /opt/spark/jars/* $TEMP_DIR/ | |
| # 创建JAR文件 | |
| jar cvf /opt/software/spark-libs.jar -C $TEMP_DIR . > /dev/null 2>&1 | |
| if [ $? -eq 0 ]; then | |
| # 上传到HDFS | |
| hdfs dfs -put -f /opt/software/spark-libs.jar /spark/jars/ > /dev/null 2>&1 | |
| if [ $? -eq 0 ]; then | |
| check_success "创建并上传 Spark Jar 档案文件" | |
| else | |
| show_error "上传 Spark Jar 档案文件到HDFS失败" | |
| fi | |
| else | |
| show_error "创建 Spark Jar 档案文件失败" | |
| fi | |
| # 清理临时目录 | |
| rm -rf $TEMP_DIR | |
| else | |
| show_success "Spark Jar 档案文件已存在" | |
| fi | |
| cd $CURRENT_DIR | |
| # 步骤 8: 同步 Spark 到所有节点 | |
| show_info "同步 Spark 到所有节点..." | |
| for node in $CLUSTER_NODES; do | |
| if [ "$node" != "$MASTER_NODE" ]; then | |
| show_info "同步 Spark 到 $node 节点..." | |
| # 同步 Spark 目录和环境变量 | |
| scp -r /opt/spark $node:/opt/ > /dev/null 2>&1 | |
| scp -r /etc/profile $node:/etc/ > /dev/null 2>&1 | |
| # 更新环境变量 | |
| ssh $node "source /etc/profile" > /dev/null 2>&1 | |
| check_success "同步 Spark 到 $node 节点完成" | |
| fi | |
| done | |
| # 步骤 9: 启动 Spark 相关服务 | |
| show_info "启动 Spark 相关服务..." | |
| # 检查Spark History Server是否已经在运行 | |
| show_info "检查 Spark History Server 状态..." | |
| if pgrep -f "org.apache.spark.deploy.history.HistoryServer" > /dev/null; then | |
| show_success "Spark History Server 已在运行" | |
| else | |
| show_info "启动 Spark History Server..." | |
| # 检查必要的目录和权限 | |
| if [ ! -d "/opt/spark/logs" ]; then | |
| mkdir -p /opt/spark/logs | |
| chmod 777 /opt/spark/logs | |
| fi | |
| # 检查HDFS目录权限 | |
| if ! hdfs dfs -test -d /spark-history > /dev/null 2>&1; then | |
| show_error "Spark历史日志目录不存在,请先运行脚本创建必要的HDFS目录" | |
| fi | |
| # 检查端口是否被占用 | |
| if netstat -tuln | grep -q ":18080"; then | |
| show_warning "端口18080已被占用,尝试停止现有服务..." | |
| /opt/spark/sbin/stop-history-server.sh > /dev/null 2>&1 | |
| sleep 5 | |
| fi | |
| # 启动服务并等待 | |
| /opt/spark/sbin/start-history-server.sh > /dev/null 2>&1 | |
| # 等待服务启动 | |
| show_info "等待服务启动..." | |
| for i in {1..30}; do | |
| if pgrep -f "org.apache.spark.deploy.history.HistoryServer" > /dev/null; then | |
| # 检查服务是否真正可用 | |
| if curl -s "http://localhost:18080" > /dev/null 2>&1; then | |
| show_success "Spark History Server 启动成功" | |
| break | |
| fi | |
| fi | |
| if [ $i -eq 30 ]; then | |
| show_error "Spark History Server 启动超时,请检查日志文件: /opt/spark/logs/spark-*-org.apache.spark.deploy.history.HistoryServer-*.out" | |
| fi | |
| echo -ne "${YELLOW}等待服务启动: $i/30 秒${NC}\r" | |
| sleep 1 | |
| done | |
| fi | |
| # 显示服务状态 | |
| show_info "Spark History Server 状态:" | |
| if curl -s "http://localhost:18080" > /dev/null 2>&1; then | |
| show_success "服务正常运行在 http://localhost:18080" | |
| else | |
| show_warning "服务可能未正常运行,请检查日志文件" | |
| fi | |
| echo -e "${BOLD}${BLUE}🌐 Web管理界面:${NC}" | |
| echo -e "┌───────────────────────────────────────────────────────────────────┐" | |
| echo -e "│ HDFS管理界面: ${BOLD}http://$MASTER_PUBLIC_IP:9870/${NC} " | |
| echo -e "│ YARN资源管理界面: ${BOLD}http://$MASTER_PUBLIC_IP:8088/${NC} " | |
| echo -e "│ MapReduce历史服务器: ${BOLD}http://$MASTER_PUBLIC_IP:19888/${NC} " | |
| echo -e "│ Spark History Server: ${BOLD}http://$MASTER_PUBLIC_IP:18080/${NC} " | |
| echo -e "└───────────────────────────────────────────────────────────────────┘" | |
| # 提供 Spark 使用说明 | |
| echo | |
| echo -e "${BOLD}${GREEN}✅ Spark 集群部署完成${NC}" | |
| echo | |
| echo -e "${BOLD}${CYAN}🖥️ Spark 集群信息:${NC}" | |
| echo -e "┌───────────────────────────────────────────────────────────────────┐" | |
| echo -e "│ Spark 版本: ${BOLD}${SPARK_VERSION}${NC} " | |
| echo -e "│ Spark 主节点: ${BOLD}${MASTER_NODE}${NC} " | |
| echo -e "│ Spark 运行模式: ${BOLD}YARN (集群模式)${NC} " | |
| echo -e "│ Spark History Server: ${BOLD}http://$MASTER_PUBLIC_IP:18080${NC} " | |
| echo -e "└───────────────────────────────────────────────────────────────────┘" | |
| echo | |
| echo -e "${BOLD}${YELLOW}📋 Spark 使用说明:${NC}" | |
| echo -e "┌───────────────────────────────────────────────────────────────────┐" | |
| echo -e "│ 1. 提交 Spark 应用到 YARN(集群模式): " | |
| echo -e "│ spark-submit --master yarn --deploy-mode cluster \\ " | |
| echo -e "│ --driver-memory 2g --executor-memory 2g \\ " | |
| echo -e "│ --executor-cores 1 --num-executors 3 \\ " | |
| echo -e "│ --class org.example.SparkApp path/to/app.jar [参数] " | |
| echo -e "│ " | |
| echo -e "│ 2. 提交 Spark 应用到 YARN(客户端模式): " | |
| echo -e "│ spark-submit --master yarn --deploy-mode client \\ " | |
| echo -e "│ --driver-memory 2g --executor-memory 2g \\ " | |
| echo -e "│ --executor-cores 1 --num-executors 3 \\ " | |
| echo -e "│ --class org.example.SparkApp path/to/app.jar [参数] " | |
| echo -e "│ " | |
| echo -e "│ 3. 启动交互式 Spark Shell: " | |
| echo -e "│ spark-shell --master yarn " | |
| echo -e "│ " | |
| echo -e "│ 4. 启动 PySpark 交互式环境: " | |
| echo -e "│ pyspark --master yarn " | |
| echo -e "│ " | |
| echo -e "│ 5. 启动 Spark SQL 命令行: " | |
| echo -e "│ spark-sql --master yarn " | |
| echo -e "│ " | |
| echo -e "│ 6. Spark 集群管理命令: " | |
| echo -e "│ - 启动历史服务器: $SPARK_HOME/sbin/start-history-server.sh " | |
| echo -e "│ - 停止历史服务器: $SPARK_HOME/sbin/stop-history-server.sh " | |
| echo -e "└───────────────────────────────────────────────────────────────────┘" | |
| echo | |
| echo -e "${BOLD}${BLUE}🔍 Spark 示例程序:${NC}" | |
| echo | |
| echo -e "${BOLD}1. 计算π值示例(client模式):${NC}" | |
| echo | |
| echo -e "命令:" | |
| echo -e "spark-submit --class org.apache.spark.examples.SparkPi \\" | |
| echo -e " --master yarn --deploy-mode client \\" | |
| echo -e " --driver-memory 2g --executor-memory 2g \\" | |
| echo -e " --executor-cores 1 --num-executors 3 \\" | |
| echo -e " /opt/spark/examples/jars/spark-examples_2.12-3.5.6.jar 10" | |
| echo | |
| echo -e "${BOLD}2. WordCount 示例(PySpark):${NC}" | |
| echo | |
| echo -e "步骤1: 创建测试文件" | |
| echo -e "cat > /tmp/words.txt << EOF" | |
| echo -e "Hello Spark Hadoop Spark Hello" | |
| echo -e "Big Data Processing with Spark" | |
| echo -e "Spark is faster than MapReduce" | |
| echo -e "EOF" | |
| echo | |
| echo -e "步骤2: 上传到HDFS" | |
| echo -e "hdfs dfs -mkdir -p /spark/input" | |
| echo -e "hdfs dfs -put /tmp/words.txt /spark/input/" | |
| echo | |
| echo -e "步骤3: 启动PySpark并执行" | |
| echo -e "pyspark --master yarn" | |
| echo | |
| echo -e "步骤4: 执行以下Python代码" | |
| echo -e "text_file = spark.read.text(\"hdfs:///spark/input/words.txt\")" | |
| echo -e "counts = text_file.rdd.flatMap(lambda line: line.value.split(\" \"))\\" | |
| echo -e " .filter(lambda word: word.strip() != \"\") \\" | |
| echo -e " .map(lambda word: (word, 1)) \\" | |
| echo -e " .reduceByKey(lambda a, b: a + b) \\" | |
| echo -e " .toDF([\"word\", \"count\"])" | |
| echo -e "counts.show()" | |
| echo -e "counts.write.csv(\"hdfs:///spark/output/wordcount-py-result\")" | |
| echo | |
| echo -e "${BOLD}${BLUE}==============================================================" | |
| echo -e " Spark 集群自动部署脚本执行完毕 " | |
| echo -e "==============================================================${NC}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment