igorcosta · August 5, 2025 22:02
diff --git a/playbook.tf b/playbook.tf
 AWSTemplateFormatVersion: '2010-09-09'
 Description: 'GPU instance for Ollama with flexible instance type selection (P4d A100 or G5 A10G)'

 Parameters:
  InstanceType:
    Type: String
    Default: 'g5.12xlarge'
    AllowedValues:
      - 'g5.2xlarge'   # 1x A10G (24GB) - $1.006/hr
      - 'g5.4xlarge'   # 1x A10G (24GB) - $1.624/hr  
      - 'g5.12xlarge'  # 4x A10G (96GB total) - $5.672/hr
      - 'g5.24xlarge'  # 4x A10G (96GB total) - $10.888/hr
      - 'g5.48xlarge'  # 8x A10G (192GB total) - $16.288/hr
      - 'p4d.24xlarge' # 8x A100 (640GB total) - $32.77/hr
      - 'p4de.24xlarge' # 8x A100 (640GB total) - $40.96/hr
    Description: 'GPU instance type - G5 for cost-effectiveness, P4d for maximum power'

  KeyPairName:
    Type: AWS::EC2::KeyPair::KeyName
    Description: 'EC2 Key Pair for SSH access to the instance'

  UseSpotInstance:
    Type: String
    Default: 'true'
    AllowedValues: ['true', 'false']
    Description: 'Use Spot instance for 60-70% cost savings'

  SpotMaxPrice:
    Type: String
    Default: '3.00'
    Description: 'Maximum Spot price per hour (adjust based on instance type)'

  VpcCidr:
    Type: String
    Default: '10.0.0.0/16'
    Description: 'CIDR block for the VPC'

  AllowedCidrBlock:
    Type: String
    Default: '0.0.0.0/0'
    Description: 'CIDR block allowed to access Ollama API'

  DefaultModel:
    Type: String
    Default: 'llama3.1:8b'
    Description: 'Default Ollama model (use smaller models for G5 instances)'

  ProjectName:
    Type: String
    Default: 'ollama-gpu'
    Description: 'Project name for resource tagging'

  Environment:
    Type: String
    Default: 'development'
    AllowedValues: ['development', 'staging', 'production']
    Description: 'Environment designation'

 Conditions:
  UseSpot: !Equals [!Ref UseSpotInstance, 'true']
  IsP4Instance: !Or 
    - !Equals [!Ref InstanceType, 'p4d.24xlarge']
    - !Equals [!Ref InstanceType, 'p4de.24xlarge']
  IsLargeG5: !Or
    - !Equals [!Ref InstanceType, 'g5.24xlarge']
    - !Equals [!Ref InstanceType, 'g5.48xlarge']

 Mappings:
  RegionMap:
    ap-southeast-2:
      # Deep Learning AMI Ubuntu 22.04
      AMI: 'ami-0c02fb55956c7d316'

  InstanceSpecs:
    g5.2xlarge:
      GPUs: 1
      GPUMemory: 24
      RecommendedModel: 'llama3.1:8b'
    g5.4xlarge:
      GPUs: 1
      GPUMemory: 24
      RecommendedModel: 'llama3.1:8b'
    g5.12xlarge:
      GPUs: 4
      GPUMemory: 96
      RecommendedModel: 'llama3.1:70b'
    g5.24xlarge:
      GPUs: 4
      GPUMemory: 96
      RecommendedModel: 'llama3.1:70b'
    g5.48xlarge:
      GPUs: 8
      GPUMemory: 192
      RecommendedModel: 'llama3.1:405b'
    p4d.24xlarge:
      GPUs: 8
      GPUMemory: 640
      RecommendedModel: 'gpt-oss:120b'
    p4de.24xlarge:
      GPUs: 8
      GPUMemory: 640
      RecommendedModel: 'gpt-oss:120b'

 Resources:
  # VPC and Networking
  VPC:
    Type: AWS::EC2::VPC
    Properties:
      CidrBlock: !Ref VpcCidr
      EnableDnsHostnames: true
      EnableDnsSupport: true
      Tags:
        - Key: Name
          Value: !Sub '${ProjectName}-${Environment}-vpc'

  InternetGateway:
    Type: AWS::EC2::InternetGateway
    Properties:
      Tags:
        - Key: Name
          Value: !Sub '${ProjectName}-${Environment}-igw'

  AttachGateway:
    Type: AWS::EC2::VPCGatewayAttachment
    Properties:
      VpcId: !Ref VPC
      InternetGatewayId: !Ref InternetGateway

  PublicSubnet:
    Type: AWS::EC2::Subnet
    Properties:
      VpcId: !Ref VPC
      CidrBlock: '10.0.1.0/24'
      AvailabilityZone: !Select [0, !GetAZs '']
      MapPublicIpOnLaunch: true
      Tags:
        - Key: Name
          Value: !Sub '${ProjectName}-${Environment}-public-subnet'

  PublicRouteTable:
    Type: AWS::EC2::RouteTable
    Properties:
      VpcId: !Ref VPC
      Tags:
        - Key: Name
          Value: !Sub '${ProjectName}-${Environment}-public-rt'

  PublicRoute:
    Type: AWS::EC2::Route
    DependsOn: AttachGateway
    Properties:
      RouteTableId: !Ref PublicRouteTable
      DestinationCidrBlock: '0.0.0.0/0'
      GatewayId: !Ref InternetGateway

  PublicSubnetRouteTableAssociation:
    Type: AWS::EC2::SubnetRouteTableAssociation
    Properties:
      SubnetId: !Ref PublicSubnet
      RouteTableId: !Ref PublicRouteTable

  # Security Group
  OllamaSecurityGroup:
    Type: AWS::EC2::SecurityGroup
    Properties:
      GroupDescription: 'Security group for Ollama GPU instance'
      VpcId: !Ref VPC
      SecurityGroupIngress:
        - IpProtocol: tcp
          FromPort: 22
          ToPort: 22
          CidrIp: !Ref AllowedCidrBlock
          Description: 'SSH access'
        - IpProtocol: tcp
          FromPort: 11434
          ToPort: 11434
          CidrIp: !Ref AllowedCidrBlock
          Description: 'Ollama API access'
      SecurityGroupEgress:
        - IpProtocol: -1
          CidrIp: 0.0.0.0/0
          Description: 'All outbound traffic'
      Tags:
        - Key: Name
          Value: !Sub '${ProjectName}-${Environment}-ollama-sg'

  # IAM Role
  GPUInstanceRole:
    Type: AWS::IAM::Role
    Properties:
      AssumeRolePolicyDocument:
        Version: '2012-10-17'
        Statement:
          - Effect: Allow
            Principal:
              Service: ec2.amazonaws.com
            Action: sts:AssumeRole
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy
      Policies:
        - PolicyName: CloudFormationSignaling
          PolicyDocument:
            Version: '2012-10-17'
            Statement:
              - Effect: Allow
                Action:
                  - cloudformation:SignalResource
                Resource: !Sub 'arn:aws:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/${AWS::StackName}/*'
      Tags:
        - Key: Name
          Value: !Sub '${ProjectName}-${Environment}-gpu-instance-role'

  GPUInstanceProfile:
    Type: AWS::IAM::InstanceProfile
    Properties:
      Roles:
        - !Ref GPUInstanceRole

  # Wait Condition
  OllamaWaitConditionHandle:
    Type: AWS::CloudFormation::WaitConditionHandle

  OllamaWaitCondition:
    Type: AWS::CloudFormation::WaitCondition
    DependsOn: GPUInstance
    Properties:
      Handle: !Ref OllamaWaitConditionHandle
      Timeout: '1800'  # 30 minutes
      Count: 1

  # Launch Template
  GPULaunchTemplate:
    Type: AWS::EC2::LaunchTemplate
    Properties:
      LaunchTemplateName: !Sub '${ProjectName}-${Environment}-gpu-template'
      LaunchTemplateData:
        ImageId: !FindInMap [RegionMap, !Ref 'AWS::Region', AMI]
        InstanceType: !Ref InstanceType
        KeyName: !Ref KeyPairName
        IamInstanceProfile:
          Name: !Ref GPUInstanceProfile
        SecurityGroupIds:
          - !Ref OllamaSecurityGroup
        BlockDeviceMappings:
          - DeviceName: /dev/sda1
            Ebs:
              VolumeSize: !If [IsP4Instance, 500, 200]
              VolumeType: gp3
              DeleteOnTermination: true
              Encrypted: true
        UserData:
          Fn::Base64: !Sub |
            #!/bin/bash
            set -e
            
            # Logging setup
            LOGFILE="/var/log/ollama-deployment.log"
            exec 1>>"$LOGFILE" 2>&1
            
            log_info() {
                echo "[$(date '+%Y-%m-%d %H:%M:%S')] INFO: $1"
            }
            
            signal_success() {
                curl -X PUT -H 'Content-Type:' \
                    --data-binary '{"Status": "SUCCESS","Reason": "Ollama deployment completed successfully","UniqueId": "ollama-deployment","Data": "Success"}' \
                    "${OllamaWaitConditionHandle}"
            }
            
            signal_failure() {
                curl -X PUT -H 'Content-Type:' \
                    --data-binary '{"Status": "FAILURE","Reason": "'$1'","UniqueId": "ollama-deployment","Data": "Failed"}' \
                    "${OllamaWaitConditionHandle}"
                exit 1
            }
            
            trap 'signal_failure "Deployment failed at line $LINENO"' ERR
            
            log_info "Starting GPU instance deployment - ${InstanceType}"
            
            # System updates
            log_info "Updating system packages"
            apt-get update -y
            apt-get upgrade -y
            apt-get install -y curl wget jq htop nvidia-smi
            
            # Verify GPU availability
            log_info "Checking GPU status"
            if ! nvidia-smi; then
                signal_failure "NVIDIA GPUs not detected"
            fi
            
            GPU_COUNT=$(nvidia-smi --list-gpus | wc -l)
            EXPECTED_GPUS=${FindInMap [InstanceSpecs, !Ref InstanceType, GPUs]}
            
            if [ "$GPU_COUNT" -ne "$EXPECTED_GPUS" ]; then
                signal_failure "Expected $EXPECTED_GPUS GPUs but found $GPU_COUNT"
            fi
            
            log_info "Detected $GPU_COUNT GPUs as expected"
            
            # Install Docker
            log_info "Installing Docker"
            curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
            add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
            apt-get update -y
            apt-get install -y docker-ce docker-ce-cli containerd.io
            
            # Install NVIDIA Container Toolkit
            log_info "Installing NVIDIA Container Toolkit"
            distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
            curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add -
            curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | tee /etc/apt/sources.list.d/nvidia-docker.list
            apt-get update -y
            apt-get install -y nvidia-container-toolkit
            
            # Configure Docker for GPU
            cat > /etc/docker/daemon.json << 'EOF'
            {
              "default-runtime": "nvidia",
              "runtimes": {
                "nvidia": {
                  "path": "nvidia-container-runtime",
                  "runtimeArgs": []
                }
              }
            }
            EOF
            
            systemctl restart docker
            systemctl enable docker
            usermod -aG docker ubuntu
            
            # Install Ollama
            log_info "Installing Ollama"
            curl -fsSL https://ollama.com/install.sh | sh
            
            # Create Ollama service
            cat > /etc/systemd/system/ollama.service << 'EOF'
            [Unit]
            Description=Ollama Service
            After=network-online.target
            Wants=network-online.target
            
            [Service]
            ExecStart=/usr/local/bin/ollama serve
            User=ollama
            Group=ollama
            Restart=always
            RestartSec=3
            Environment="OLLAMA_HOST=0.0.0.0:11434"
            Environment="OLLAMA_ORIGINS=*"
            
            [Install]
            WantedBy=multi-user.target
            EOF
            
            # Create ollama user
            useradd -r -s /bin/false -m -d /usr/share/ollama ollama
            
            # Start Ollama
            systemctl daemon-reload
            systemctl enable ollama
            systemctl start ollama
            
            # Wait for Ollama to be ready
            log_info "Waiting for Ollama service to start"
            for i in {1..60}; do
                if curl -f http://localhost:11434/api/tags >/dev/null 2>&1; then
                    log_info "Ollama service is ready"
                    break
                fi
                if [ $i -eq 60 ]; then
                    signal_failure "Ollama service failed to start within timeout"
                fi
                sleep 5
            done
            
            # Download default model
            RECOMMENDED_MODEL="${FindInMap [InstanceSpecs, !Ref InstanceType, RecommendedModel]}"
            DEFAULT_MODEL="${DefaultModel}"
            
            log_info "Downloading model: $DEFAULT_MODEL"
            if ! timeout 1800 ollama pull "$DEFAULT_MODEL"; then
                log_info "Failed to download $DEFAULT_MODEL, trying recommended model: $RECOMMENDED_MODEL"
                if ! timeout 1800 ollama pull "$RECOMMENDED_MODEL"; then
                    signal_failure "Failed to download any model"
                fi
            fi
            
            # Create startup summary
            cat > /home/ubuntu/deployment-summary.txt << EOF
            ==========================================
            OLLAMA GPU DEPLOYMENT SUMMARY
            ==========================================
            Instance Type: ${InstanceType}
            GPUs: $GPU_COUNT x $(nvidia-smi --query-gpu=name --format=csv,noheader,nounits | head -1)
            GPU Memory: ${FindInMap [InstanceSpecs, !Ref InstanceType, GPUMemory]}GB total
            
            Ollama Endpoint: http://$(curl -s http://169.254.169.254/latest/meta-data/public-ipv4):11434
            
            Available Models:
            $(ollama list)
            
            Test Commands:
            curl http://localhost:11434/api/tags
            ollama run $DEFAULT_MODEL
            
            GPU Status:
            $(nvidia-smi)
            ==========================================
            EOF
            
            chown ubuntu:ubuntu /home/ubuntu/deployment-summary.txt
            
            log_info "Deployment completed successfully"
            signal_success

        TagSpecifications:
          - ResourceType: instance
            Tags:
              - Key: Name
                Value: !Sub '${ProjectName}-${Environment}-ollama-gpu'
              - Key: Project
                Value: !Ref ProjectName
              - Key: Environment
                Value: !Ref Environment
              - Key: InstanceType
                Value: !Ref InstanceType

  # GPU Instance
  GPUInstance:
    Type: !If
      - UseSpot
      - AWS::EC2::SpotFleet
      - AWS::EC2::Instance
    Properties: !If
      - UseSpot
      - SpotFleetRequestConfig:
          IamFleetRole: !Sub 'arn:aws:iam::${AWS::AccountId}:role/aws-ec2-spot-fleet-tagging-role'
          AllocationStrategy: 'diversified'
          TargetCapacity: 1
          SpotPrice: !Ref SpotMaxPrice
          LaunchTemplateConfigs:
            - LaunchTemplateSpecification:
                LaunchTemplateId: !Ref GPULaunchTemplate
                Version: !GetAtt GPULaunchTemplate.LatestVersionNumber
              Overrides:
                - InstanceType: !Ref InstanceType
                  SubnetId: !Ref PublicSubnet
                  WeightedCapacity: 1
          ReplaceUnhealthyInstances: true
          Type: 'maintain'
      - LaunchTemplate:
          LaunchTemplateId: !Ref GPULaunchTemplate
          Version: !GetAtt GPULaunchTemplate.LatestVersionNumber
        SubnetId: !Ref PublicSubnet

 Outputs:
  InstanceType:
    Description: 'Selected GPU instance type'
    Value: !Ref InstanceType

  ExpectedGPUs:
    Description: 'Number of GPUs in the instance'
    Value: !FindInMap [InstanceSpecs, !Ref InstanceType, GPUs]

  TotalGPUMemory:
    Description: 'Total GPU memory available'
    Value: !Sub '${FindInMap [InstanceSpecs, !Ref InstanceType, GPUMemory]}GB'

  RecommendedModel:
    Description: 'Recommended model for this instance type'
    Value: !FindInMap [InstanceSpecs, !Ref InstanceType, RecommendedModel]

  OllamaEndpoint:
    Description: 'Ollama API endpoint (get IP from EC2 console)'
    Value: 'http://[INSTANCE-PUBLIC-IP]:11434'

  SSHCommand:
    Description: 'SSH command to connect to the instance'
    Value: !Sub 'ssh -i ${KeyPairName}.pem ubuntu@[INSTANCE-PUBLIC-IP]'

  EstimatedCostPerHour:
    Description: 'Estimated cost per hour for selected instance type'
    Value: !If
      - UseSpot
      - !Sub 'Spot: ~30-70% of On-Demand pricing (varies by availability)'
      - !Sub 'On-Demand pricing - check AWS pricing page for current rates'

  CostOptimizationTips:
    Description: 'Tips to reduce costs'
    Value: |
      1. Use Spot instances for development/training (60-70% savings)
      2. Stop instances when not in use
      3. Consider smaller G5 instances for lighter workloads
      4. Use Savings Plans for predictable workloads
	AWSTemplateFormatVersion: '2010-09-09'
	Description: 'GPU instance for Ollama with flexible instance type selection (P4d A100 or G5 A10G)'

	Parameters:
	InstanceType:
	Type: String
	Default: 'g5.12xlarge'
	AllowedValues:
	- 'g5.2xlarge' # 1x A10G (24GB) - $1.006/hr
	- 'g5.4xlarge' # 1x A10G (24GB) - $1.624/hr
	- 'g5.12xlarge' # 4x A10G (96GB total) - $5.672/hr
	- 'g5.24xlarge' # 4x A10G (96GB total) - $10.888/hr
	- 'g5.48xlarge' # 8x A10G (192GB total) - $16.288/hr
	- 'p4d.24xlarge' # 8x A100 (640GB total) - $32.77/hr
	- 'p4de.24xlarge' # 8x A100 (640GB total) - $40.96/hr
	Description: 'GPU instance type - G5 for cost-effectiveness, P4d for maximum power'

	KeyPairName:
	Type: AWS::EC2::KeyPair::KeyName
	Description: 'EC2 Key Pair for SSH access to the instance'

	UseSpotInstance:
	Type: String
	Default: 'true'
	AllowedValues: ['true', 'false']
	Description: 'Use Spot instance for 60-70% cost savings'

	SpotMaxPrice:
	Type: String
	Default: '3.00'
	Description: 'Maximum Spot price per hour (adjust based on instance type)'

	VpcCidr:
	Type: String
	Default: '10.0.0.0/16'
	Description: 'CIDR block for the VPC'

	AllowedCidrBlock:
	Type: String
	Default: '0.0.0.0/0'
	Description: 'CIDR block allowed to access Ollama API'

	DefaultModel:
	Type: String
	Default: 'llama3.1:8b'
	Description: 'Default Ollama model (use smaller models for G5 instances)'

	ProjectName:
	Type: String
	Default: 'ollama-gpu'
	Description: 'Project name for resource tagging'

	Environment:
	Type: String
	Default: 'development'
	AllowedValues: ['development', 'staging', 'production']
	Description: 'Environment designation'

	Conditions:
	UseSpot: !Equals [!Ref UseSpotInstance, 'true']
	IsP4Instance: !Or
	- !Equals [!Ref InstanceType, 'p4d.24xlarge']
	- !Equals [!Ref InstanceType, 'p4de.24xlarge']
	IsLargeG5: !Or
	- !Equals [!Ref InstanceType, 'g5.24xlarge']
	- !Equals [!Ref InstanceType, 'g5.48xlarge']

	Mappings:
	RegionMap:
	ap-southeast-2:
	# Deep Learning AMI Ubuntu 22.04
	AMI: 'ami-0c02fb55956c7d316'

	InstanceSpecs:
	g5.2xlarge:
	GPUs: 1
	GPUMemory: 24
	RecommendedModel: 'llama3.1:8b'
	g5.4xlarge:
	GPUs: 1
	GPUMemory: 24
	RecommendedModel: 'llama3.1:8b'
	g5.12xlarge:
	GPUs: 4
	GPUMemory: 96
	RecommendedModel: 'llama3.1:70b'
	g5.24xlarge:
	GPUs: 4
	GPUMemory: 96
	RecommendedModel: 'llama3.1:70b'
	g5.48xlarge:
	GPUs: 8
	GPUMemory: 192
	RecommendedModel: 'llama3.1:405b'
	p4d.24xlarge:
	GPUs: 8
	GPUMemory: 640
	RecommendedModel: 'gpt-oss:120b'
	p4de.24xlarge:
	GPUs: 8
	GPUMemory: 640
	RecommendedModel: 'gpt-oss:120b'

	Resources:
	# VPC and Networking
	VPC:
	Type: AWS::EC2::VPC
	Properties:
	CidrBlock: !Ref VpcCidr
	EnableDnsHostnames: true
	EnableDnsSupport: true
	Tags:
	- Key: Name
	Value: !Sub '${ProjectName}-${Environment}-vpc'

	InternetGateway:
	Type: AWS::EC2::InternetGateway
	Properties:
	Tags:
	- Key: Name
	Value: !Sub '${ProjectName}-${Environment}-igw'

	AttachGateway:
	Type: AWS::EC2::VPCGatewayAttachment
	Properties:
	VpcId: !Ref VPC
	InternetGatewayId: !Ref InternetGateway

	PublicSubnet:
	Type: AWS::EC2::Subnet
	Properties:
	VpcId: !Ref VPC
	CidrBlock: '10.0.1.0/24'
	AvailabilityZone: !Select [0, !GetAZs '']
	MapPublicIpOnLaunch: true
	Tags:
	- Key: Name
	Value: !Sub '${ProjectName}-${Environment}-public-subnet'

	PublicRouteTable:
	Type: AWS::EC2::RouteTable
	Properties:
	VpcId: !Ref VPC
	Tags:
	- Key: Name
	Value: !Sub '${ProjectName}-${Environment}-public-rt'

	PublicRoute:
	Type: AWS::EC2::Route
	DependsOn: AttachGateway
	Properties:
	RouteTableId: !Ref PublicRouteTable
	DestinationCidrBlock: '0.0.0.0/0'
	GatewayId: !Ref InternetGateway

	PublicSubnetRouteTableAssociation:
	Type: AWS::EC2::SubnetRouteTableAssociation
	Properties:
	SubnetId: !Ref PublicSubnet
	RouteTableId: !Ref PublicRouteTable

	# Security Group
	OllamaSecurityGroup:
	Type: AWS::EC2::SecurityGroup
	Properties:
	GroupDescription: 'Security group for Ollama GPU instance'
	VpcId: !Ref VPC
	SecurityGroupIngress:
	- IpProtocol: tcp
	FromPort: 22
	ToPort: 22
	CidrIp: !Ref AllowedCidrBlock
	Description: 'SSH access'
	- IpProtocol: tcp
	FromPort: 11434
	ToPort: 11434
	CidrIp: !Ref AllowedCidrBlock
	Description: 'Ollama API access'
	SecurityGroupEgress:
	- IpProtocol: -1
	CidrIp: 0.0.0.0/0
	Description: 'All outbound traffic'
	Tags:
	- Key: Name
	Value: !Sub '${ProjectName}-${Environment}-ollama-sg'

	# IAM Role
	GPUInstanceRole:
	Type: AWS::IAM::Role
	Properties:
	AssumeRolePolicyDocument:
	Version: '2012-10-17'
	Statement:
	- Effect: Allow
	Principal:
	Service: ec2.amazonaws.com
	Action: sts:AssumeRole
	ManagedPolicyArns:
	- arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy
	Policies:
	- PolicyName: CloudFormationSignaling
	PolicyDocument:
	Version: '2012-10-17'
	Statement:
	- Effect: Allow
	Action:
	- cloudformation:SignalResource
	Resource: !Sub 'arn:aws:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/${AWS::StackName}/*'
	Tags:
	- Key: Name
	Value: !Sub '${ProjectName}-${Environment}-gpu-instance-role'

	GPUInstanceProfile:
	Type: AWS::IAM::InstanceProfile
	Properties:
	Roles:
	- !Ref GPUInstanceRole

	# Wait Condition
	OllamaWaitConditionHandle:
	Type: AWS::CloudFormation::WaitConditionHandle

	OllamaWaitCondition:
	Type: AWS::CloudFormation::WaitCondition
	DependsOn: GPUInstance
	Properties:
	Handle: !Ref OllamaWaitConditionHandle
	Timeout: '1800' # 30 minutes
	Count: 1

	# Launch Template
	GPULaunchTemplate:
	Type: AWS::EC2::LaunchTemplate
	Properties:
	LaunchTemplateName: !Sub '${ProjectName}-${Environment}-gpu-template'
	LaunchTemplateData:
	ImageId: !FindInMap [RegionMap, !Ref 'AWS::Region', AMI]
	InstanceType: !Ref InstanceType
	KeyName: !Ref KeyPairName
	IamInstanceProfile:
	Name: !Ref GPUInstanceProfile
	SecurityGroupIds:
	- !Ref OllamaSecurityGroup
	BlockDeviceMappings:
	- DeviceName: /dev/sda1
	Ebs:
	VolumeSize: !If [IsP4Instance, 500, 200]
	VolumeType: gp3
	DeleteOnTermination: true
	Encrypted: true
	UserData:
	Fn::Base64: !Sub \|
	#!/bin/bash
	set -e

	# Logging setup
	LOGFILE="/var/log/ollama-deployment.log"
	exec 1>>"$LOGFILE" 2>&1

	log_info() {
	echo "[$(date '+%Y-%m-%d %H:%M:%S')] INFO: $1"
	}

	signal_success() {
	curl -X PUT -H 'Content-Type:' \
	--data-binary '{"Status": "SUCCESS","Reason": "Ollama deployment completed successfully","UniqueId": "ollama-deployment","Data": "Success"}' \
	"${OllamaWaitConditionHandle}"
	}

	signal_failure() {
	curl -X PUT -H 'Content-Type:' \
	--data-binary '{"Status": "FAILURE","Reason": "'$1'","UniqueId": "ollama-deployment","Data": "Failed"}' \
	"${OllamaWaitConditionHandle}"
	exit 1
	}

	trap 'signal_failure "Deployment failed at line $LINENO"' ERR

	log_info "Starting GPU instance deployment - ${InstanceType}"

	# System updates
	log_info "Updating system packages"
	apt-get update -y
	apt-get upgrade -y
	apt-get install -y curl wget jq htop nvidia-smi

	# Verify GPU availability
	log_info "Checking GPU status"
	if ! nvidia-smi; then
	signal_failure "NVIDIA GPUs not detected"
	fi

	GPU_COUNT=$(nvidia-smi --list-gpus \| wc -l)
	EXPECTED_GPUS=${FindInMap [InstanceSpecs, !Ref InstanceType, GPUs]}

	if [ "$GPU_COUNT" -ne "$EXPECTED_GPUS" ]; then
	signal_failure "Expected $EXPECTED_GPUS GPUs but found $GPU_COUNT"
	fi

	log_info "Detected $GPU_COUNT GPUs as expected"

	# Install Docker
	log_info "Installing Docker"
	curl -fsSL https://download.docker.com/linux/ubuntu/gpg \| apt-key add -
	add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
	apt-get update -y
	apt-get install -y docker-ce docker-ce-cli containerd.io

	# Install NVIDIA Container Toolkit
	log_info "Installing NVIDIA Container Toolkit"
	distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
	curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey \| apt-key add -
	curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list \| tee /etc/apt/sources.list.d/nvidia-docker.list
	apt-get update -y
	apt-get install -y nvidia-container-toolkit

	# Configure Docker for GPU
	cat > /etc/docker/daemon.json << 'EOF'
	{
	"default-runtime": "nvidia",
	"runtimes": {
	"nvidia": {
	"path": "nvidia-container-runtime",
	"runtimeArgs": []
	}
	}
	}
	EOF

	systemctl restart docker
	systemctl enable docker
	usermod -aG docker ubuntu

	# Install Ollama
	log_info "Installing Ollama"
	curl -fsSL https://ollama.com/install.sh \| sh

	# Create Ollama service
	cat > /etc/systemd/system/ollama.service << 'EOF'
	[Unit]
	Description=Ollama Service
	After=network-online.target
	Wants=network-online.target

	[Service]
	ExecStart=/usr/local/bin/ollama serve
	User=ollama
	Group=ollama
	Restart=always
	RestartSec=3
	Environment="OLLAMA_HOST=0.0.0.0:11434"
	Environment="OLLAMA_ORIGINS=*"

	[Install]
	WantedBy=multi-user.target
	EOF

	# Create ollama user
	useradd -r -s /bin/false -m -d /usr/share/ollama ollama

	# Start Ollama
	systemctl daemon-reload
	systemctl enable ollama
	systemctl start ollama

	# Wait for Ollama to be ready
	log_info "Waiting for Ollama service to start"
	for i in {1..60}; do
	if curl -f http://localhost:11434/api/tags >/dev/null 2>&1; then
	log_info "Ollama service is ready"
	break
	fi
	if [ $i -eq 60 ]; then
	signal_failure "Ollama service failed to start within timeout"
	fi
	sleep 5
	done

	# Download default model
	RECOMMENDED_MODEL="${FindInMap [InstanceSpecs, !Ref InstanceType, RecommendedModel]}"
	DEFAULT_MODEL="${DefaultModel}"

	log_info "Downloading model: $DEFAULT_MODEL"
	if ! timeout 1800 ollama pull "$DEFAULT_MODEL"; then
	log_info "Failed to download $DEFAULT_MODEL, trying recommended model: $RECOMMENDED_MODEL"
	if ! timeout 1800 ollama pull "$RECOMMENDED_MODEL"; then
	signal_failure "Failed to download any model"
	fi
	fi

	# Create startup summary
	cat > /home/ubuntu/deployment-summary.txt << EOF
	==========================================
	OLLAMA GPU DEPLOYMENT SUMMARY
	==========================================
	Instance Type: ${InstanceType}
	GPUs: $GPU_COUNT x $(nvidia-smi --query-gpu=name --format=csv,noheader,nounits \| head -1)
	GPU Memory: ${FindInMap [InstanceSpecs, !Ref InstanceType, GPUMemory]}GB total

	Ollama Endpoint: http://$(curl -s http://169.254.169.254/latest/meta-data/public-ipv4):11434

	Available Models:
	$(ollama list)

	Test Commands:
	curl http://localhost:11434/api/tags
	ollama run $DEFAULT_MODEL

	GPU Status:
	$(nvidia-smi)
	==========================================
	EOF

	chown ubuntu:ubuntu /home/ubuntu/deployment-summary.txt

	log_info "Deployment completed successfully"
	signal_success

	TagSpecifications:
	- ResourceType: instance
	Tags:
	- Key: Name
	Value: !Sub '${ProjectName}-${Environment}-ollama-gpu'
	- Key: Project
	Value: !Ref ProjectName
	- Key: Environment
	Value: !Ref Environment
	- Key: InstanceType
	Value: !Ref InstanceType

	# GPU Instance
	GPUInstance:
	Type: !If
	- UseSpot
	- AWS::EC2::SpotFleet
	- AWS::EC2::Instance
	Properties: !If
	- UseSpot
	- SpotFleetRequestConfig:
	IamFleetRole: !Sub 'arn:aws:iam::${AWS::AccountId}:role/aws-ec2-spot-fleet-tagging-role'
	AllocationStrategy: 'diversified'
	TargetCapacity: 1
	SpotPrice: !Ref SpotMaxPrice
	LaunchTemplateConfigs:
	- LaunchTemplateSpecification:
	LaunchTemplateId: !Ref GPULaunchTemplate
	Version: !GetAtt GPULaunchTemplate.LatestVersionNumber
	Overrides:
	- InstanceType: !Ref InstanceType
	SubnetId: !Ref PublicSubnet
	WeightedCapacity: 1
	ReplaceUnhealthyInstances: true
	Type: 'maintain'
	- LaunchTemplate:
	LaunchTemplateId: !Ref GPULaunchTemplate
	Version: !GetAtt GPULaunchTemplate.LatestVersionNumber
	SubnetId: !Ref PublicSubnet

	Outputs:
	InstanceType:
	Description: 'Selected GPU instance type'
	Value: !Ref InstanceType

	ExpectedGPUs:
	Description: 'Number of GPUs in the instance'
	Value: !FindInMap [InstanceSpecs, !Ref InstanceType, GPUs]

	TotalGPUMemory:
	Description: 'Total GPU memory available'
	Value: !Sub '${FindInMap [InstanceSpecs, !Ref InstanceType, GPUMemory]}GB'

	RecommendedModel:
	Description: 'Recommended model for this instance type'
	Value: !FindInMap [InstanceSpecs, !Ref InstanceType, RecommendedModel]

	OllamaEndpoint:
	Description: 'Ollama API endpoint (get IP from EC2 console)'
	Value: 'http://[INSTANCE-PUBLIC-IP]:11434'

	SSHCommand:
	Description: 'SSH command to connect to the instance'
	Value: !Sub 'ssh -i ${KeyPairName}.pem ubuntu@[INSTANCE-PUBLIC-IP]'

	EstimatedCostPerHour:
	Description: 'Estimated cost per hour for selected instance type'
	Value: !If
	- UseSpot
	- !Sub 'Spot: ~30-70% of On-Demand pricing (varies by availability)'
	- !Sub 'On-Demand pricing - check AWS pricing page for current rates'

	CostOptimizationTips:
	Description: 'Tips to reduce costs'
	Value: \|
	1. Use Spot instances for development/training (60-70% savings)
	2. Stop instances when not in use
	3. Consider smaller G5 instances for lighter workloads
	4. Use Savings Plans for predictable workloads