Skip to content

Instantly share code, notes, and snippets.

@superyngo
Last active January 21, 2026 00:17
Show Gist options
  • Select an option

  • Save superyngo/a9543b560d59ceeb2d5fcb9eab87c560 to your computer and use it in GitHub Desktop.

Select an option

Save superyngo/a9543b560d59ceeb2d5fcb9eab87c560 to your computer and use it in GitHub Desktop.
Application process manager with monitoring capabilities
#!/bin/bash
# wappman.sh
#
# Application process manager with monitoring capabilities:
# - Manages application lifecycle (start/stop/restart)
# - Optional health checking (periodic process monitoring)
# - Optional file watching (auto-restart on file changes)
# - Pure script solution (no systemd/supervisor dependencies)
#
# Usage:
# ./wappman.sh start - Start all components
# ./wappman.sh stop - Stop all components
# ./wappman.sh restart - Restart all components
# ./wappman.sh status - Show current status
#
# Requires: inotifywait (inotify-tools) - only if file watching is enabled
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Default config file path (can be overridden with --config)
CONF="$SCRIPT_DIR/wappman.conf"
# ============================================================================
# Configuration Loading & Validation
# ============================================================================
load_config() {
if [ ! -f "$CONF" ]; then
echo "ERROR: Configuration file not found: $CONF"
echo ""
echo "Creating template configuration..."
create_config_template
echo "Template created at: $CONF"
echo "Please edit the configuration and try again."
exit 1
fi
source "$CONF"
# Required fields
: "${APP_EXEC:?ERROR: APP_EXEC is required in $CONF}"
# Optional fields with defaults
MANAGER_LOG_FILE="${MANAGER_LOG_FILE:-./logs/wappman.log}"
APP_LOG_FILE="${APP_LOG_FILE:-}"
STATE_DIR="${STATE_DIR:-./state}"
WATCH_FILES="${WATCH_FILES:-}"
RESTART_FILE="${RESTART_FILE:-}"
HEALTH_CHECK_INTERVAL="${HEALTH_CHECK_INTERVAL:-5}"
RESTART_MIN_INTERVAL="${RESTART_MIN_INTERVAL:-2}"
RESTART_DELAY="${RESTART_DELAY:-0}"
STOP_TIMEOUT="${STOP_TIMEOUT:-10}"
APP_ARGS="${APP_ARGS:-}"
START_SUCCESS_COMMAND="${START_SUCCESS_COMMAND:-}"
START_POST_DELAY="${START_POST_DELAY:-${RESTART_DELAY}}"
RESTART_MAX="${RESTART_MAX:-0}"
# Convert relative paths to absolute
[[ "$MANAGER_LOG_FILE" = /* ]] || MANAGER_LOG_FILE="$SCRIPT_DIR/$MANAGER_LOG_FILE"
[[ "$STATE_DIR" = /* ]] || STATE_DIR="$SCRIPT_DIR/$STATE_DIR"
# Convert APP_LOG_FILE to absolute if set
if [ -n "$APP_LOG_FILE" ]; then
[[ "$APP_LOG_FILE" = /* ]] || APP_LOG_FILE="$SCRIPT_DIR/$APP_LOG_FILE"
fi
}
create_config_template() {
cat > "$CONF" << 'EOF'
# wappman.conf
# Configuration for wappman.sh
# ===================================================================
# 必填欄位
# ===================================================================
# 應用程式執行檔路徑 (必填)
APP_EXEC="/path/to/your/app"
# Manager 日誌檔路徑 (必填,相對或絕對路徑)
# Manager 自身的運行日誌
MANAGER_LOG_FILE="./logs/wappman.log"
# ===================================================================
# 選填欄位 - 應用程式設定
# ===================================================================
# 應用程式啟動參數 (支援引號)
# 範例: APP_ARGS='--port 8080 --name "My App" --flag'
APP_ARGS=""
# 應用程式日誌檔路徑 (選填,相對或絕對路徑)
# 若不設定,應用程式輸出將與 manager 日誌合併到 MANAGER_LOG_FILE
# 範例: APP_LOG_FILE="./logs/app.log"
APP_LOG_FILE=""
# ===================================================================
# 選填欄位 - 狀態目錄
# ===================================================================
# 狀態檔案儲存目錄 (存放 PID、state、lock 等檔案)
STATE_DIR="./state"
# ===================================================================
# 選填欄位 - 檔案監控
# ===================================================================
# 監控檔案列表 (空白分隔,留空表示不監控)
# 範例: WATCH_FILES="/path/to/config.yaml /path/to/another.conf"
WATCH_FILES=""
# 重啟觸發檔案 (此檔案被寫入時觸發重啟,留空表示不使用)
# 範例: RESTART_FILE="/tmp/restart.trigger"
RESTART_FILE=""
# ===================================================================
# 選填欄位 - 健康檢查
# ===================================================================
# 健康檢查間隔 (秒),0 表示停用健康檢查
HEALTH_CHECK_INTERVAL=5
# ===================================================================
# 選填欄位 - 行為控制
# ===================================================================
# 重啟延遲 (秒,選填,默認為 0)
# 當偵測到需要重啟時(health check 或 inotify),延遲指定時間後才執行重啟
# 設定為 0 表示立即重啟
RESTART_DELAY=0
# 重啟最小間隔 (秒),防止短時間內重複重啟
RESTART_MIN_INTERVAL=2
# 停止超時時間 (秒),超時後強制終止
STOP_TIMEOUT=10
# 應用程式啟動成功後執行的驗證命令 (選填,留空表示不執行)
# 此命令在每次啟動(包括首次啟動和重啟)後都會執行
# 用於驗證應用程式是否正常運作(如健康檢查 API 呼叫)
# 範例: START_SUCCESS_COMMAND='curl -f http://localhost:8080/health'
# 若命令執行失敗(返回非零),應用程式將被標記為 crashed
START_SUCCESS_COMMAND=""
# 應用程式啟動成功後等待多久才執行驗證命令 (秒,選填)
# 留空時預設使用 RESTART_DELAY 的值,給予應用程式充足的啟動時間
START_POST_DELAY=""
# 重啟次數上限 (選填,0 表示無限制)
# 當應用程式因 crash 重啟次數超過此上限時,將關閉所有監控和應用程式,不再重啟
RESTART_MAX=0
EOF
}
# ============================================================================
# Preflight Checks
# ============================================================================
preflight_check() {
local errors=0
# Check APP_EXEC exists
if [ ! -f "$APP_EXEC" ] && [ ! -x "$APP_EXEC" ]; then
echo "WARNING: APP_EXEC not found or not executable: $APP_EXEC"
fi
# Check write permissions for log files
for log_file in "$MANAGER_LOG_FILE" ${APP_LOG_FILE:+"$APP_LOG_FILE"}; do
if [ -f "$log_file" ]; then
# File exists, check if writable
if [ ! -w "$log_file" ]; then
echo "ERROR: No write permission for log file: $log_file"
errors=$((errors + 1))
fi
fi
done
# Check write permissions for state directory
local state_dir_parent
state_dir_parent="$(dirname "$STATE_DIR")"
mkdir -p "$STATE_DIR" 2>/dev/null || true
if [ ! -w "$STATE_DIR" ]; then
echo "ERROR: No write permission for state directory: $STATE_DIR"
errors=$((errors + 1))
fi
# Check inotifywait if file watching is configured
if [ -n "${WATCH_FILES:-}" ] || [ -n "${RESTART_FILE:-}" ]; then
if ! command -v inotifywait &>/dev/null; then
echo "ERROR: inotifywait not found. Install it with:"
echo " sudo apt install inotify-tools (Debian/Ubuntu)"
echo " sudo yum install inotify-tools (RHEL/CentOS)"
errors=$((errors + 1))
fi
fi
if [ "$errors" -gt 0 ]; then
exit 1
fi
}
# ============================================================================
# State Directory Management
# ============================================================================
init_state_dir() {
mkdir -p "$STATE_DIR" 2>/dev/null || {
echo "ERROR: Failed to create state directory: $STATE_DIR"
exit 1
}
}
read_state() {
local component="$1"
local key="$2"
local state_file="$STATE_DIR/${component}.state"
if [ -f "$state_file" ]; then
grep "^${key}=" "$state_file" 2>/dev/null | cut -d= -f2- || echo ""
else
echo ""
fi
}
write_state() {
local component="$1"
shift
local state_file="$STATE_DIR/${component}.state"
local temp_file="${state_file}.tmp"
# Remove existing temp file if present
[ -f "$temp_file" ] && rm -f "$temp_file"
# Read existing state
if [ -f "$state_file" ]; then
cp "$state_file" "$temp_file"
else
: > "$temp_file"
fi
# Update key-value pairs
for pair in "$@"; do
local key="${pair%%=*}"
local value="${pair#*=}"
if grep -q "^${key}=" "$temp_file" 2>/dev/null; then
sed -i "s|^${key}=.*|${key}=${value}|" "$temp_file"
else
echo "${key}=${value}" >> "$temp_file"
fi
done
# Atomic write
mv "$temp_file" "$state_file"
}
# ============================================================================
# Logging
# ============================================================================
log() {
mkdir -p "$(dirname "$MANAGER_LOG_FILE")"
echo "[$(date '+%F %T')] $*" >> "$MANAGER_LOG_FILE"
}
execute_post_start_command() {
# 檢查是否有設定驗證命令
if [ -z "${START_SUCCESS_COMMAND:-}" ]; then
return 0
fi
# 等待 START_POST_DELAY 秒
if [ "${START_POST_DELAY:-0}" -gt 0 ]; then
log "post-start: waiting ${START_POST_DELAY}s before executing verification command"
sleep "$START_POST_DELAY"
fi
# 記錄即將執行的命令
log "post-start: executing verification command: $START_SUCCESS_COMMAND"
# 執行命令並捕獲輸出
local cmd_start
cmd_start=$(date '+%F %T')
local cmd_output
local cmd_exit_code
# 使用臨時檔案捕獲 stdout 和 stderr
local temp_output
temp_output=$(mktemp)
eval "$START_SUCCESS_COMMAND" > "$temp_output" 2>&1
cmd_exit_code=$?
cmd_output=$(cat "$temp_output")
rm -f "$temp_output"
# 格式化輸出到日誌
log "post-start: command executed at $cmd_start"
log "post-start: exit code: $cmd_exit_code"
# 記錄命令輸出(限制最多 50 行)
if [ -n "$cmd_output" ]; then
local line_count
line_count=$(echo "$cmd_output" | wc -l)
if [ "$line_count" -gt 50 ]; then
log "post-start: command output (first 50 lines of $line_count):"
echo "$cmd_output" | head -n 50 | while IFS= read -r line; do
log " | $line"
done
log " | ... ($(( line_count - 50 )) lines truncated)"
else
log "post-start: command output:"
echo "$cmd_output" | while IFS= read -r line; do
log " | $line"
done
fi
else
log "post-start: command output: (empty)"
fi
# 檢查命令是否成功
if [ "$cmd_exit_code" -ne 0 ]; then
log "ERROR: post-start verification command failed (exit code: $cmd_exit_code)"
return 1
fi
log "post-start: verification command succeeded"
return 0
}
shutdown_all() {
local reason="${1:-max restarts exceeded}"
log "CRITICAL: shutting down all components (reason: $reason)"
# Stop health checker
local health_pid
health_pid=$(cat "$STATE_DIR/health.pid" 2>/dev/null || true)
if [ -n "$health_pid" ] && kill -0 "$health_pid" 2>/dev/null; then
log "stopping health checker (pid=$health_pid)"
kill "$health_pid" 2>/dev/null || true
sleep 0.5
if kill -0 "$health_pid" 2>/dev/null; then
kill -9 "$health_pid" 2>/dev/null || true
fi
rm -f "$STATE_DIR/health.pid" 2>/dev/null || true
write_state health "STATUS=stopped" "PID="
fi
# Stop inotify watcher
local inotify_pid
inotify_pid=$(cat "$STATE_DIR/inotify.pid" 2>/dev/null || true)
if [ -n "$inotify_pid" ] && kill -0 "$inotify_pid" 2>/dev/null; then
log "stopping inotify watcher (pid=$inotify_pid)"
pkill -P "$inotify_pid" 2>/dev/null || true
kill "$inotify_pid" 2>/dev/null || true
sleep 0.5
if kill -0 "$inotify_pid" 2>/dev/null; then
pkill -9 -P "$inotify_pid" 2>/dev/null || true
kill -9 "$inotify_pid" 2>/dev/null || true
fi
rm -f "$STATE_DIR/inotify.pid" 2>/dev/null || true
write_state inotify "STATUS=stopped" "PID="
fi
# Stop app
stop_app
write_state app "STATUS=shutdown" "SHUTDOWN_REASON=$reason" "SHUTDOWN_TIME=$(date +%s)"
log "all components stopped (reason: $reason)"
}
# ============================================================================
# Application Lifecycle
# ============================================================================
get_args_array() {
local -n __out="$1"
__out=()
if [ -n "${APP_ARGS:-}" ]; then
eval "__out=($APP_ARGS)"
fi
}
start_app() {
local -a args_array=()
get_args_array args_array
# Log app file info before starting
if [ -f "$APP_EXEC" ]; then
local app_size
local app_atime
local app_mtime
local app_ctime
# Get file size (in bytes)
app_size=$(stat -c%s "$APP_EXEC" 2>/dev/null || stat -f%z "$APP_EXEC" 2>/dev/null || echo "unknown")
# Get access time
app_atime=$(stat -c%x "$APP_EXEC" 2>/dev/null || stat -f%Sa -t "%Y-%m-%d %H:%M:%S" "$APP_EXEC" 2>/dev/null || echo "unknown")
# Get modification time
app_mtime=$(stat -c%y "$APP_EXEC" 2>/dev/null || stat -f%Sm -t "%Y-%m-%d %H:%M:%S" "$APP_EXEC" 2>/dev/null || echo "unknown")
# Get change time
app_ctime=$(stat -c%z "$APP_EXEC" 2>/dev/null || stat -f%Sc -t "%Y-%m-%d %H:%M:%S" "$APP_EXEC" 2>/dev/null || echo "unknown")
log "app file info: size=${app_size} bytes, atime=${app_atime}, mtime=${app_mtime}, ctime=${app_ctime}"
fi
log "start app: $APP_EXEC ${args_array[*]:-}"
# 決定應用程式的日誌輸出位置
local app_output_log
if [ -n "$APP_LOG_FILE" ]; then
app_output_log="$APP_LOG_FILE"
log "app output will be logged to: $APP_LOG_FILE"
else
app_output_log="$MANAGER_LOG_FILE"
fi
# 確保日誌目錄存在
mkdir -p "$(dirname "$app_output_log")"
if [ "${#args_array[@]}" -gt 0 ]; then
"$APP_EXEC" "${args_array[@]}" >> "$app_output_log" 2>&1 &
else
"$APP_EXEC" >> "$app_output_log" 2>&1 &
fi
local app_pid=$!
echo "$app_pid" > "$STATE_DIR/app.pid"
# Sanity check
sleep 0.2
if ! kill -0 "$app_pid" 2>/dev/null; then
log "ERROR: app failed to start (pid=$app_pid exited immediately)"
rm -f "$STATE_DIR/app.pid" 2>/dev/null || true
return 1
fi
local now
now=$(date +%s)
write_state app \
"STATUS=running" \
"PID=$app_pid" \
"START_TIME=$now" \
"LAST_START_TIME=$now"
log "app started successfully (pid=$app_pid)"
# 執行啟動後驗證命令
if ! execute_post_start_command; then
log "ERROR: post-start verification failed, marking app as crashed"
write_state app "STATUS=crashed"
return 1
fi
return 0
}
stop_app() {
local pid_file="$STATE_DIR/app.pid"
if [ ! -f "$pid_file" ]; then
write_state app "STATUS=stopped" "PID="
return 0
fi
local pid
pid=$(cat "$pid_file" 2>/dev/null || true)
if [ -z "$pid" ] || ! kill -0 "$pid" 2>/dev/null; then
rm -f "$pid_file" 2>/dev/null || true
write_state app "STATUS=stopped" "PID="
return 0
fi
log "stopping app (pid=$pid)"
kill "$pid" 2>/dev/null || true
local waited=0
while [ "$waited" -lt "$STOP_TIMEOUT" ]; do
if ! kill -0 "$pid" 2>/dev/null; then
log "app stopped gracefully after ${waited}s"
rm -f "$pid_file" 2>/dev/null || true
write_state app "STATUS=stopped" "PID="
return 0
fi
sleep 1
waited=$((waited + 1))
done
if kill -0 "$pid" 2>/dev/null; then
log "force killing app (pid=$pid, timeout after ${STOP_TIMEOUT}s)"
kill -9 "$pid" 2>/dev/null || true
sleep 0.5
fi
rm -f "$pid_file" 2>/dev/null || true
write_state app "STATUS=stopped" "PID="
return 0
}
restart_app() {
local reason="${1:-manual}"
# Check restart limit if reason is crash-related
if [ "${RESTART_MAX:-0}" -gt 0 ]; then
local crash_count=0
# Count crash-related restarts
if [[ "$reason" == "health_check" ]] || [[ "$reason" == *"crash"* ]]; then
crash_count=$(read_state app CRASH_RESTART_COUNT)
crash_count=${crash_count:-0}
if [ "$crash_count" -ge "$RESTART_MAX" ]; then
log "CRITICAL: crash restart limit reached ($crash_count >= $RESTART_MAX)"
shutdown_all "crash restart limit exceeded (count=$crash_count, max=$RESTART_MAX)"
return 1
fi
fi
fi
# Check debounce
local last_start
last_start=$(read_state app LAST_START_TIME)
last_start=${last_start:-0}
local now
now=$(date +%s)
if (( now - last_start < RESTART_MIN_INTERVAL )); then
log "restart skipped: debounce (last=${last_start}, now=${now}, min=${RESTART_MIN_INTERVAL}s)"
return 0
fi
# Apply restart delay if configured
if [ "${RESTART_DELAY:-0}" -gt 0 ]; then
log "restart delayed: waiting ${RESTART_DELAY}s before restart (reason=${reason})"
sleep "$RESTART_DELAY"
fi
# Stop and start
stop_app
if start_app; then
local count
count=$(read_state app RESTART_COUNT)
count=${count:-0}
count=$((count + 1))
# Update crash restart count if crash-related
local crash_count
crash_count=$(read_state app CRASH_RESTART_COUNT)
crash_count=${crash_count:-0}
if [[ "$reason" == "health_check" ]] || [[ "$reason" == *"crash"* ]]; then
crash_count=$((crash_count + 1))
write_state app \
"RESTART_COUNT=$count" \
"CRASH_RESTART_COUNT=$crash_count" \
"LAST_RESTART_TIME=$now" \
"LAST_RESTART_REASON=$reason"
log "app restarted successfully (reason=$reason, count=$count, crash_count=$crash_count)"
else
# Reset crash count for manual restarts
write_state app \
"RESTART_COUNT=$count" \
"CRASH_RESTART_COUNT=0" \
"LAST_RESTART_TIME=$now" \
"LAST_RESTART_REASON=$reason"
log "app restarted successfully (reason=$reason, count=$count, crash_count reset to 0)"
fi
return 0
else
# Increment crash count on failed restart
local crash_count
crash_count=$(read_state app CRASH_RESTART_COUNT)
crash_count=${crash_count:-0}
crash_count=$((crash_count + 1))
write_state app "STATUS=crashed" "CRASH_RESTART_COUNT=$crash_count"
log "ERROR: app restart failed (crash_count=$crash_count)"
# Check if we've hit the limit
if [ "${RESTART_MAX:-0}" -gt 0 ] && [ "$crash_count" -ge "$RESTART_MAX" ]; then
log "CRITICAL: crash restart limit reached after failed restart ($crash_count >= $RESTART_MAX)"
shutdown_all "crash restart limit exceeded after failed restart (count=$crash_count, max=$RESTART_MAX)"
fi
return 1
fi
}
# ============================================================================
# Health Checker (background process)
# ============================================================================
start_health_checker() {
(
trap 'log "health checker stopping"; exit 0' SIGTERM SIGINT
write_state health "STATUS=running" "PID=$$" "INTERVAL=$HEALTH_CHECK_INTERVAL"
log "health checker started (pid=$$, interval=${HEALTH_CHECK_INTERVAL}s)"
local checks_ok=0
local checks_fail=0
while true; do
sleep "$HEALTH_CHECK_INTERVAL"
local app_pid
app_pid=$(cat "$STATE_DIR/app.pid" 2>/dev/null || true)
if [ -z "$app_pid" ] || ! kill -0 "$app_pid" 2>/dev/null; then
log "health check: app not running, triggering restart..."
checks_fail=$((checks_fail + 1))
write_state health "CHECKS_FAIL=$checks_fail" "LAST_CHECK_TIME=$(date +%s)"
restart_app "health_check"
else
checks_ok=$((checks_ok + 1))
write_state health "CHECKS_OK=$checks_ok" "LAST_CHECK_TIME=$(date +%s)"
fi
done
) &
local health_pid=$!
echo "$health_pid" > "$STATE_DIR/health.pid"
}
# ============================================================================
# File Watcher (background process)
# ============================================================================
build_watch_paths() {
WATCH_PATHS=()
WATCH_FILES_ARRAY=()
local has_valid=false
# Parse WATCH_FILES
if [ -n "${WATCH_FILES:-}" ]; then
read -r -a WATCH_FILES_ARRAY <<< "$WATCH_FILES"
for f in "${WATCH_FILES_ARRAY[@]}"; do
if [ -f "$f" ]; then
local dir
dir="$(dirname "$f")"
# Add directory to watch paths (unique)
local exists=false
for p in "${WATCH_PATHS[@]}"; do
[ "$p" = "$dir" ] && exists=true && break
done
$exists || WATCH_PATHS+=("$dir")
has_valid=true
else
log "WARNING: WATCH_FILES path not found, skipping: $f"
fi
done
fi
# Parse RESTART_FILE
if [ -n "${RESTART_FILE:-}" ]; then
local restart_dir
restart_dir="$(dirname "$RESTART_FILE")"
if [ -d "$restart_dir" ]; then
local exists=false
for p in "${WATCH_PATHS[@]}"; do
[ "$p" = "$restart_dir" ] && exists=true && break
done
$exists || WATCH_PATHS+=("$restart_dir")
has_valid=true
else
log "WARNING: RESTART_FILE directory not found, skipping: $restart_dir"
fi
fi
if ! $has_valid; then
return 1
fi
return 0
}
start_inotify_watcher() {
(
# Ensure this subshell and all children terminate together
trap 'pkill -P $$; log "inotify watcher stopping"; exit 0' SIGTERM SIGINT
log "inotify watcher started (pid=$$, watching ${#WATCH_PATHS[@]} paths)"
local events_triggered=0
# Start inotifywait and capture its output
inotifywait -m \
-e close_write -e moved_to -e delete_self \
--format '%w%f|%e' \
"${WATCH_PATHS[@]}" 2>&1 | while IFS='|' read -r path ev; do
# Skip inotify info messages
[[ "$path" =~ ^Watches\ established ]] && continue
local should_restart=false
local reason=""
# Check restart file
if [ -n "${RESTART_FILE:-}" ] && [ "$path" = "$RESTART_FILE" ]; then
should_restart=true
reason="restart_file"
fi
# Check watched files
if ! $should_restart; then
for f in "${WATCH_FILES_ARRAY[@]}"; do
if [ "$path" = "$f" ]; then
should_restart=true
reason="file_change:$(basename "$path")"
break
fi
done
fi
if $should_restart; then
log "inotify: $path ($ev) -> restart"
events_triggered=$((events_triggered + 1))
write_state inotify \
"EVENTS_TRIGGERED=$events_triggered" \
"LAST_EVENT_TIME=$(date +%s)" \
"LAST_EVENT_PATH=$path"
restart_app "$reason"
# Clean up restart file
if [ "$reason" = "restart_file" ]; then
rm -f "$RESTART_FILE" 2>/dev/null || true
fi
fi
done
) &
local inotify_pid=$!
echo "$inotify_pid" > "$STATE_DIR/inotify.pid"
# Write state after getting the actual PID
write_state inotify \
"STATUS=running" \
"PID=$inotify_pid" \
"WATCH_COUNT=${#WATCH_PATHS[@]}" \
"WATCH_FILES=${WATCH_FILES:-}" \
"RESTART_FILE=${RESTART_FILE:-}"
}
# ============================================================================
# Status Checking
# ============================================================================
is_running() {
for proc in app health inotify; do
local pid_file="$STATE_DIR/${proc}.pid"
if [ -f "$pid_file" ]; then
local pid
pid=$(cat "$pid_file" 2>/dev/null || true)
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
return 0
fi
fi
done
return 1
}
get_uptime() {
local start_time="$1"
if [ -z "$start_time" ] || [ "$start_time" = "0" ]; then
echo "N/A"
return
fi
local now
now=$(date +%s)
local uptime=$((now - start_time))
local hours=$((uptime / 3600))
local minutes=$(( (uptime % 3600) / 60 ))
local seconds=$((uptime % 60))
if [ "$hours" -gt 0 ]; then
echo "${hours}h ${minutes}m ${seconds}s"
elif [ "$minutes" -gt 0 ]; then
echo "${minutes}m ${seconds}s"
else
echo "${seconds}s"
fi
}
# ============================================================================
# Commands
# ============================================================================
cmd_start() {
echo "Starting wappman..."
echo ""
preflight_check
init_state_dir
if is_running; then
echo "ERROR: Already running. Use 'status' to check or 'stop' first."
exit 1
fi
# Clean up restart trigger file if exists
if [ -n "${RESTART_FILE:-}" ] && [ -f "$RESTART_FILE" ]; then
rm -f "$RESTART_FILE"
echo "○ Cleaned up existing restart trigger file"
fi
# Start app
if ! start_app; then
echo "ERROR: Failed to start application"
# 如果有健康檢查,啟動 health checker 來嘗試重啟
if [ "${HEALTH_CHECK_INTERVAL:-0}" -gt 0 ]; then
echo "○ Health checker will attempt to restart the application"
start_health_checker
local health_pid
health_pid=$(cat "$STATE_DIR/health.pid" 2>/dev/null)
echo "✓ Health checker started (pid=$health_pid, interval=${HEALTH_CHECK_INTERVAL}s)"
# 繼續啟動 inotify watcher
if build_watch_paths; then
start_inotify_watcher
local inotify_pid
inotify_pid=$(cat "$STATE_DIR/inotify.pid" 2>/dev/null)
echo "✓ File watcher started (pid=$inotify_pid)"
else
write_state inotify "STATUS=disabled"
echo "○ File watcher disabled (no valid watch paths configured)"
fi
echo ""
echo "Application failed to start, but health checker is running."
echo "Manager log: $MANAGER_LOG_FILE"
if [ -n "$APP_LOG_FILE" ]; then
echo "App log: $APP_LOG_FILE"
else
echo "App output: merged with manager log"
fi
echo "Use '$0 status' to check, '$0 stop' to stop."
exit 0
else
# 無健康檢查,直接關閉 app 並退出
echo "○ No health checker configured, shutting down"
stop_app
exit 1
fi
fi
local app_pid
app_pid=$(cat "$STATE_DIR/app.pid" 2>/dev/null)
echo "✓ Application started (pid=$app_pid)"
# Start health checker
if [ "${HEALTH_CHECK_INTERVAL:-0}" -gt 0 ]; then
start_health_checker
local health_pid
health_pid=$(cat "$STATE_DIR/health.pid" 2>/dev/null)
echo "✓ Health checker started (pid=$health_pid, interval=${HEALTH_CHECK_INTERVAL}s)"
else
write_state health "STATUS=disabled"
echo "○ Health checker disabled (HEALTH_CHECK_INTERVAL=0)"
fi
# Start inotify watcher
if build_watch_paths; then
start_inotify_watcher
local inotify_pid
inotify_pid=$(cat "$STATE_DIR/inotify.pid" 2>/dev/null)
echo "✓ File watcher started (pid=$inotify_pid)"
echo " Monitoring paths:"
for path in "${WATCH_PATHS[@]}"; do
echo " - $path"
done
if [ -n "${WATCH_FILES:-}" ]; then
echo " Watch files: ${WATCH_FILES}"
fi
if [ -n "${RESTART_FILE:-}" ]; then
echo " Restart trigger: ${RESTART_FILE}"
fi
else
write_state inotify "STATUS=disabled"
echo "○ File watcher disabled (no valid watch paths configured)"
fi
echo ""
echo "All components started successfully."
echo "Manager log: $MANAGER_LOG_FILE"
if [ -n "$APP_LOG_FILE" ]; then
echo "App log: $APP_LOG_FILE"
else
echo "App output: merged with manager log"
fi
echo "Use '$0 status' to check, '$0 stop' to stop."
}
cmd_stop() {
echo "Stopping wappman..."
echo ""
init_state_dir
local stopped=0
# Stop in order: health → inotify → app
for proc in health inotify app; do
local pid_file="$STATE_DIR/${proc}.pid"
if [ ! -f "$pid_file" ]; then
continue
fi
local pid
pid=$(cat "$pid_file" 2>/dev/null || true)
if [ -z "$pid" ] || ! kill -0 "$pid" 2>/dev/null; then
rm -f "$pid_file" 2>/dev/null || true
continue
fi
echo "Stopping $proc (pid=$pid)..."
# For inotify, kill all child processes first
if [ "$proc" = "inotify" ]; then
# Kill all children of the subshell
pkill -P "$pid" 2>/dev/null || true
sleep 0.3
fi
# Then kill the main process
kill "$pid" 2>/dev/null || true
# Wait for graceful shutdown
local waited=0
while [ "$waited" -lt "$STOP_TIMEOUT" ] && kill -0 "$pid" 2>/dev/null; do
sleep 1
waited=$((waited + 1))
done
# Force kill if still running
if kill -0 "$pid" 2>/dev/null; then
echo " Force killing $proc..."
if [ "$proc" = "inotify" ]; then
# Force kill all children first
pkill -9 -P "$pid" 2>/dev/null || true
fi
kill -9 "$pid" 2>/dev/null || true
sleep 0.5
fi
# Final cleanup for inotify - ensure no orphaned inotifywait
if [ "$proc" = "inotify" ]; then
pkill -9 -P "$pid" 2>/dev/null || true
fi
rm -f "$pid_file" 2>/dev/null || true
write_state "$proc" "STATUS=stopped" "PID="
stopped=$((stopped + 1))
done
echo ""
if [ "$stopped" -gt 0 ]; then
echo "Stopped $stopped component(s)."
else
echo "No components were running."
fi
}
cmd_restart() {
cmd_stop
sleep 1
cmd_start
}
cmd_restart_app() {
init_state_dir
if ! is_running; then
echo "ERROR: Manager not running. Use 'start' first."
exit 1
fi
echo "Restarting application only..."
restart_app "manual"
echo "Application restarted."
}
cmd_restart_monitor() {
init_state_dir
echo "Restarting monitor components (health checker and file watcher)..."
echo ""
# Stop monitors
local stopped=0
for proc in health inotify; do
local pid_file="$STATE_DIR/${proc}.pid"
if [ ! -f "$pid_file" ]; then
continue
fi
local pid
pid=$(cat "$pid_file" 2>/dev/null || true)
if [ -z "$pid" ] || ! kill -0 "$pid" 2>/dev/null; then
rm -f "$pid_file" 2>/dev/null || true
continue
fi
echo "Stopping $proc (pid=$pid)..."
# For inotify, kill all child processes first
if [ "$proc" = "inotify" ]; then
pkill -P "$pid" 2>/dev/null || true
sleep 0.3
fi
# Then kill the main process
kill "$pid" 2>/dev/null || true
# Wait for graceful shutdown
local waited=0
while [ "$waited" -lt "$STOP_TIMEOUT" ] && kill -0 "$pid" 2>/dev/null; do
sleep 1
waited=$((waited + 1))
done
# Force kill if still running
if kill -0 "$pid" 2>/dev/null; then
echo " Force killing $proc..."
if [ "$proc" = "inotify" ]; then
pkill -9 -P "$pid" 2>/dev/null || true
fi
kill -9 "$pid" 2>/dev/null || true
sleep 0.5
fi
# Final cleanup for inotify
if [ "$proc" = "inotify" ]; then
pkill -9 -P "$pid" 2>/dev/null || true
fi
rm -f "$pid_file" 2>/dev/null || true
write_state "$proc" "STATUS=stopped" "PID="
stopped=$((stopped + 1))
done
if [ "$stopped" -gt 0 ]; then
echo "Stopped $stopped monitor(s)."
fi
echo ""
sleep 1
# Start monitors
local started=0
# Start health checker
if [ "$HEALTH_CHECK_INTERVAL" -gt 0 ]; then
echo "Starting health checker..."
start_health_checker
started=$((started + 1))
fi
# Start inotify watcher
if build_watch_paths; then
echo "Starting file watcher..."
start_inotify_watcher
started=$((started + 1))
fi
echo ""
if [ "$started" -gt 0 ]; then
echo "Started $started monitor(s)."
else
echo "No monitors configured to start."
fi
}
cmd_clean() {
init_state_dir
echo "Cleaning state directory..."
echo ""
# Check if anything is running
if is_running; then
echo "ERROR: Components are still running. Please stop first."
echo "Run: $0 stop"
exit 1
fi
# Remove all state files
local removed=0
for file in "$STATE_DIR"/*.{pid,state}; do
if [ -f "$file" ]; then
rm -f "$file"
echo "Removed: $(basename "$file")"
removed=$((removed + 1))
fi
done
echo ""
if [ "$removed" -gt 0 ]; then
echo "Cleaned $removed file(s) from state directory."
else
echo "State directory is already clean."
fi
}
cmd_status() {
init_state_dir
echo "wappman status"
echo "═══════════════════════════════════════════════════════════"
echo ""
# Configuration Info
echo "Configuration:"
echo " Config file: $CONF"
echo " State dir: $STATE_DIR"
local exec_cmd="$APP_EXEC"
if [ -n "$APP_ARGS" ]; then
exec_cmd="$exec_cmd $APP_ARGS"
fi
echo " Command: $exec_cmd"
echo " Manager log: $MANAGER_LOG_FILE"
if [ -n "$APP_LOG_FILE" ]; then
echo " App log: $APP_LOG_FILE"
else
echo " App log: (merged with manager log)"
fi
echo ""
# Application status
echo "Application:"
local app_pid
app_pid=$(cat "$STATE_DIR/app.pid" 2>/dev/null || true)
if [ -n "$app_pid" ] && kill -0 "$app_pid" 2>/dev/null; then
local start_time
start_time=$(read_state app START_TIME)
local uptime
uptime=$(get_uptime "$start_time")
local restart_count
restart_count=$(read_state app RESTART_COUNT)
restart_count=${restart_count:-0}
# Extract restart events from log
local log_restart_count=0
local log_health_restarts=0
local log_file_restarts=0
local log_manual_restarts=0
if [ -f "$MANAGER_LOG_FILE" ]; then
log_restart_count=$(grep -c "app restarted successfully" "$MANAGER_LOG_FILE" 2>/dev/null || true)
log_restart_count=${log_restart_count:-0}
log_health_restarts=$(grep "app restarted successfully" "$MANAGER_LOG_FILE" 2>/dev/null | grep -c "reason=health_check" 2>/dev/null || true)
log_health_restarts=${log_health_restarts:-0}
log_file_restarts=$(grep "app restarted successfully" "$MANAGER_LOG_FILE" 2>/dev/null | grep -cE "reason=(file_change|restart_file)" 2>/dev/null || true)
log_file_restarts=${log_file_restarts:-0}
log_manual_restarts=$(grep "app restarted successfully" "$MANAGER_LOG_FILE" 2>/dev/null | grep -c "reason=manual" 2>/dev/null || true)
log_manual_restarts=${log_manual_restarts:-0}
fi
echo " Status: running (pid=$app_pid)"
echo " Uptime: $uptime"
echo " Restarts: $restart_count (total from log: $log_restart_count)"
if [ "$log_restart_count" -gt 0 ]; then
echo " ├─ Health check: $log_health_restarts"
echo " ├─ File change: $log_file_restarts"
echo " └─ Manual: $log_manual_restarts"
fi
if [ "$restart_count" -gt 0 ]; then
local last_reason
last_reason=$(read_state app LAST_RESTART_REASON)
echo " Last restart: ${last_reason:-unknown}"
fi
else
echo " Status: stopped"
fi
echo ""
# Health checker status
echo "Health Checker:"
local health_status
health_status=$(read_state health STATUS)
if [ "$health_status" = "disabled" ]; then
echo " Status: disabled"
else
local health_pid
health_pid=$(cat "$STATE_DIR/health.pid" 2>/dev/null || true)
if [ -n "$health_pid" ] && kill -0 "$health_pid" 2>/dev/null; then
local interval
interval=$(read_state health INTERVAL)
local checks_ok
checks_ok=$(read_state health CHECKS_OK)
local checks_fail
checks_fail=$(read_state health CHECKS_FAIL)
echo " Status: running (pid=$health_pid)"
echo " Interval: ${interval:-N/A}s"
echo " Checks: OK=${checks_ok:-0} FAIL=${checks_fail:-0}"
else
echo " Status: stopped"
fi
fi
echo ""
# File watcher status
echo "File Watcher:"
local inotify_status
inotify_status=$(read_state inotify STATUS)
if [ "$inotify_status" = "disabled" ]; then
echo " Status: disabled"
else
local inotify_pid
inotify_pid=$(cat "$STATE_DIR/inotify.pid" 2>/dev/null || true)
if [ -n "$inotify_pid" ] && kill -0 "$inotify_pid" 2>/dev/null; then
local watch_count
watch_count=$(read_state inotify WATCH_COUNT)
local events
events=$(read_state inotify EVENTS_TRIGGERED)
local stored_watch_files
stored_watch_files=$(read_state inotify WATCH_FILES)
local stored_restart_file
stored_restart_file=$(read_state inotify RESTART_FILE)
echo " Status: running (pid=$inotify_pid)"
echo " Events: ${events:-0} triggered"
echo " Monitoring:"
# Rebuild watch paths to show current monitoring
if build_watch_paths; then
for path in "${WATCH_PATHS[@]}"; do
echo " - $path"
done
fi
# Show watch files
if [ -n "$stored_watch_files" ]; then
echo " Watch files:"
for f in $stored_watch_files; do
if [ -f "$f" ]; then
echo " ✓ $f"
else
echo " ✗ $f (not found)"
fi
done
fi
# Show restart trigger file
if [ -n "$stored_restart_file" ]; then
if [ -f "$stored_restart_file" ]; then
echo " Restart trigger: $stored_restart_file ✓"
else
echo " Restart trigger: $stored_restart_file (will be monitored when created)"
fi
fi
else
echo " Status: stopped"
fi
fi
echo ""
echo "═══════════════════════════════════════════════════════════"
}
# ============================================================================
# Main Entry Point
# ============================================================================
show_usage() {
cat << EOF
Usage: $0 [--config <path>] <command>
Options:
--config <path> Specify configuration file path (default: $SCRIPT_DIR/wappman.conf)
Commands:
start Start all components (app, health checker, file watcher)
stop Stop all components
restart Restart all components (stop + start)
restart-app Restart application only (keep monitors running)
restart-monitor Restart monitor components (health checker and file watcher)
status Show current status of all components
clean Clean all state files (must stop first)
Configuration:
Edit $CONF to configure the manager.
EOF
}
main() {
# Parse --config option
while [ $# -gt 0 ]; do
case "$1" in
--config)
if [ -z "${2:-}" ]; then
echo "ERROR: --config requires a path argument"
exit 1
fi
CONF="$2"
shift 2
;;
--config=*)
CONF="${1#--config=}"
shift
;;
*)
break
;;
esac
done
local command="${1:-}"
if [ -z "$command" ]; then
# Check if config exists when no command is provided
if [ ! -f "$CONF" ]; then
echo "Configuration file not found: $CONF"
echo ""
read -p "Create configuration template? (y/N): " response
case "$response" in
[yY]|[yY][eE][sS])
create_config_template
echo ""
echo "✓ Configuration template created: $CONF"
echo ""
echo "Next steps:"
echo " 1. Edit the configuration file:"
echo " vim $CONF"
echo " 2. Set APP_EXEC to your application path"
echo " 3. Run: $0 start"
exit 0
;;
*)
echo "Configuration template not created."
echo ""
;;
esac
fi
show_usage
exit 1
fi
# Load configuration for all commands except template creation
load_config
case "$command" in
start)
cmd_start
;;
stop)
cmd_stop
;;
restart)
cmd_restart
;;
restart-app)
cmd_restart_app
;;
restart-monitor)
cmd_restart_monitor
;;
status)
cmd_status
;;
clean)
cmd_clean
;;
*)
echo "ERROR: Unknown command: $command"
echo ""
show_usage
exit 1
;;
esac
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment