Skip to content

Instantly share code, notes, and snippets.

@igorcosta
Created August 5, 2025 22:02
Show Gist options
  • Save igorcosta/224a339d1cdef3aad66f7948db77a1fc to your computer and use it in GitHub Desktop.
Save igorcosta/224a339d1cdef3aad66f7948db77a1fc to your computer and use it in GitHub Desktop.
Launch gpt-oss-120b with Ollama on AWS Sydney region
AWSTemplateFormatVersion: '2010-09-09'
Description: 'GPU instance for Ollama with flexible instance type selection (P4d A100 or G5 A10G)'
Parameters:
InstanceType:
Type: String
Default: 'g5.12xlarge'
AllowedValues:
- 'g5.2xlarge' # 1x A10G (24GB) - $1.006/hr
- 'g5.4xlarge' # 1x A10G (24GB) - $1.624/hr
- 'g5.12xlarge' # 4x A10G (96GB total) - $5.672/hr
- 'g5.24xlarge' # 4x A10G (96GB total) - $10.888/hr
- 'g5.48xlarge' # 8x A10G (192GB total) - $16.288/hr
- 'p4d.24xlarge' # 8x A100 (640GB total) - $32.77/hr
- 'p4de.24xlarge' # 8x A100 (640GB total) - $40.96/hr
Description: 'GPU instance type - G5 for cost-effectiveness, P4d for maximum power'
KeyPairName:
Type: AWS::EC2::KeyPair::KeyName
Description: 'EC2 Key Pair for SSH access to the instance'
UseSpotInstance:
Type: String
Default: 'true'
AllowedValues: ['true', 'false']
Description: 'Use Spot instance for 60-70% cost savings'
SpotMaxPrice:
Type: String
Default: '3.00'
Description: 'Maximum Spot price per hour (adjust based on instance type)'
VpcCidr:
Type: String
Default: '10.0.0.0/16'
Description: 'CIDR block for the VPC'
AllowedCidrBlock:
Type: String
Default: '0.0.0.0/0'
Description: 'CIDR block allowed to access Ollama API'
DefaultModel:
Type: String
Default: 'llama3.1:8b'
Description: 'Default Ollama model (use smaller models for G5 instances)'
ProjectName:
Type: String
Default: 'ollama-gpu'
Description: 'Project name for resource tagging'
Environment:
Type: String
Default: 'development'
AllowedValues: ['development', 'staging', 'production']
Description: 'Environment designation'
Conditions:
UseSpot: !Equals [!Ref UseSpotInstance, 'true']
IsP4Instance: !Or
- !Equals [!Ref InstanceType, 'p4d.24xlarge']
- !Equals [!Ref InstanceType, 'p4de.24xlarge']
IsLargeG5: !Or
- !Equals [!Ref InstanceType, 'g5.24xlarge']
- !Equals [!Ref InstanceType, 'g5.48xlarge']
Mappings:
RegionMap:
ap-southeast-2:
# Deep Learning AMI Ubuntu 22.04
AMI: 'ami-0c02fb55956c7d316'
InstanceSpecs:
g5.2xlarge:
GPUs: 1
GPUMemory: 24
RecommendedModel: 'llama3.1:8b'
g5.4xlarge:
GPUs: 1
GPUMemory: 24
RecommendedModel: 'llama3.1:8b'
g5.12xlarge:
GPUs: 4
GPUMemory: 96
RecommendedModel: 'llama3.1:70b'
g5.24xlarge:
GPUs: 4
GPUMemory: 96
RecommendedModel: 'llama3.1:70b'
g5.48xlarge:
GPUs: 8
GPUMemory: 192
RecommendedModel: 'llama3.1:405b'
p4d.24xlarge:
GPUs: 8
GPUMemory: 640
RecommendedModel: 'gpt-oss:120b'
p4de.24xlarge:
GPUs: 8
GPUMemory: 640
RecommendedModel: 'gpt-oss:120b'
Resources:
# VPC and Networking
VPC:
Type: AWS::EC2::VPC
Properties:
CidrBlock: !Ref VpcCidr
EnableDnsHostnames: true
EnableDnsSupport: true
Tags:
- Key: Name
Value: !Sub '${ProjectName}-${Environment}-vpc'
InternetGateway:
Type: AWS::EC2::InternetGateway
Properties:
Tags:
- Key: Name
Value: !Sub '${ProjectName}-${Environment}-igw'
AttachGateway:
Type: AWS::EC2::VPCGatewayAttachment
Properties:
VpcId: !Ref VPC
InternetGatewayId: !Ref InternetGateway
PublicSubnet:
Type: AWS::EC2::Subnet
Properties:
VpcId: !Ref VPC
CidrBlock: '10.0.1.0/24'
AvailabilityZone: !Select [0, !GetAZs '']
MapPublicIpOnLaunch: true
Tags:
- Key: Name
Value: !Sub '${ProjectName}-${Environment}-public-subnet'
PublicRouteTable:
Type: AWS::EC2::RouteTable
Properties:
VpcId: !Ref VPC
Tags:
- Key: Name
Value: !Sub '${ProjectName}-${Environment}-public-rt'
PublicRoute:
Type: AWS::EC2::Route
DependsOn: AttachGateway
Properties:
RouteTableId: !Ref PublicRouteTable
DestinationCidrBlock: '0.0.0.0/0'
GatewayId: !Ref InternetGateway
PublicSubnetRouteTableAssociation:
Type: AWS::EC2::SubnetRouteTableAssociation
Properties:
SubnetId: !Ref PublicSubnet
RouteTableId: !Ref PublicRouteTable
# Security Group
OllamaSecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: 'Security group for Ollama GPU instance'
VpcId: !Ref VPC
SecurityGroupIngress:
- IpProtocol: tcp
FromPort: 22
ToPort: 22
CidrIp: !Ref AllowedCidrBlock
Description: 'SSH access'
- IpProtocol: tcp
FromPort: 11434
ToPort: 11434
CidrIp: !Ref AllowedCidrBlock
Description: 'Ollama API access'
SecurityGroupEgress:
- IpProtocol: -1
CidrIp: 0.0.0.0/0
Description: 'All outbound traffic'
Tags:
- Key: Name
Value: !Sub '${ProjectName}-${Environment}-ollama-sg'
# IAM Role
GPUInstanceRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Principal:
Service: ec2.amazonaws.com
Action: sts:AssumeRole
ManagedPolicyArns:
- arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy
Policies:
- PolicyName: CloudFormationSignaling
PolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Action:
- cloudformation:SignalResource
Resource: !Sub 'arn:aws:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/${AWS::StackName}/*'
Tags:
- Key: Name
Value: !Sub '${ProjectName}-${Environment}-gpu-instance-role'
GPUInstanceProfile:
Type: AWS::IAM::InstanceProfile
Properties:
Roles:
- !Ref GPUInstanceRole
# Wait Condition
OllamaWaitConditionHandle:
Type: AWS::CloudFormation::WaitConditionHandle
OllamaWaitCondition:
Type: AWS::CloudFormation::WaitCondition
DependsOn: GPUInstance
Properties:
Handle: !Ref OllamaWaitConditionHandle
Timeout: '1800' # 30 minutes
Count: 1
# Launch Template
GPULaunchTemplate:
Type: AWS::EC2::LaunchTemplate
Properties:
LaunchTemplateName: !Sub '${ProjectName}-${Environment}-gpu-template'
LaunchTemplateData:
ImageId: !FindInMap [RegionMap, !Ref 'AWS::Region', AMI]
InstanceType: !Ref InstanceType
KeyName: !Ref KeyPairName
IamInstanceProfile:
Name: !Ref GPUInstanceProfile
SecurityGroupIds:
- !Ref OllamaSecurityGroup
BlockDeviceMappings:
- DeviceName: /dev/sda1
Ebs:
VolumeSize: !If [IsP4Instance, 500, 200]
VolumeType: gp3
DeleteOnTermination: true
Encrypted: true
UserData:
Fn::Base64: !Sub |
#!/bin/bash
set -e
# Logging setup
LOGFILE="/var/log/ollama-deployment.log"
exec 1>>"$LOGFILE" 2>&1
log_info() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] INFO: $1"
}
signal_success() {
curl -X PUT -H 'Content-Type:' \
--data-binary '{"Status": "SUCCESS","Reason": "Ollama deployment completed successfully","UniqueId": "ollama-deployment","Data": "Success"}' \
"${OllamaWaitConditionHandle}"
}
signal_failure() {
curl -X PUT -H 'Content-Type:' \
--data-binary '{"Status": "FAILURE","Reason": "'$1'","UniqueId": "ollama-deployment","Data": "Failed"}' \
"${OllamaWaitConditionHandle}"
exit 1
}
trap 'signal_failure "Deployment failed at line $LINENO"' ERR
log_info "Starting GPU instance deployment - ${InstanceType}"
# System updates
log_info "Updating system packages"
apt-get update -y
apt-get upgrade -y
apt-get install -y curl wget jq htop nvidia-smi
# Verify GPU availability
log_info "Checking GPU status"
if ! nvidia-smi; then
signal_failure "NVIDIA GPUs not detected"
fi
GPU_COUNT=$(nvidia-smi --list-gpus | wc -l)
EXPECTED_GPUS=${FindInMap [InstanceSpecs, !Ref InstanceType, GPUs]}
if [ "$GPU_COUNT" -ne "$EXPECTED_GPUS" ]; then
signal_failure "Expected $EXPECTED_GPUS GPUs but found $GPU_COUNT"
fi
log_info "Detected $GPU_COUNT GPUs as expected"
# Install Docker
log_info "Installing Docker"
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y
apt-get install -y docker-ce docker-ce-cli containerd.io
# Install NVIDIA Container Toolkit
log_info "Installing NVIDIA Container Toolkit"
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add -
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | tee /etc/apt/sources.list.d/nvidia-docker.list
apt-get update -y
apt-get install -y nvidia-container-toolkit
# Configure Docker for GPU
cat > /etc/docker/daemon.json << 'EOF'
{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"path": "nvidia-container-runtime",
"runtimeArgs": []
}
}
}
EOF
systemctl restart docker
systemctl enable docker
usermod -aG docker ubuntu
# Install Ollama
log_info "Installing Ollama"
curl -fsSL https://ollama.com/install.sh | sh
# Create Ollama service
cat > /etc/systemd/system/ollama.service << 'EOF'
[Unit]
Description=Ollama Service
After=network-online.target
Wants=network-online.target
[Service]
ExecStart=/usr/local/bin/ollama serve
User=ollama
Group=ollama
Restart=always
RestartSec=3
Environment="OLLAMA_HOST=0.0.0.0:11434"
Environment="OLLAMA_ORIGINS=*"
[Install]
WantedBy=multi-user.target
EOF
# Create ollama user
useradd -r -s /bin/false -m -d /usr/share/ollama ollama
# Start Ollama
systemctl daemon-reload
systemctl enable ollama
systemctl start ollama
# Wait for Ollama to be ready
log_info "Waiting for Ollama service to start"
for i in {1..60}; do
if curl -f http://localhost:11434/api/tags >/dev/null 2>&1; then
log_info "Ollama service is ready"
break
fi
if [ $i -eq 60 ]; then
signal_failure "Ollama service failed to start within timeout"
fi
sleep 5
done
# Download default model
RECOMMENDED_MODEL="${FindInMap [InstanceSpecs, !Ref InstanceType, RecommendedModel]}"
DEFAULT_MODEL="${DefaultModel}"
log_info "Downloading model: $DEFAULT_MODEL"
if ! timeout 1800 ollama pull "$DEFAULT_MODEL"; then
log_info "Failed to download $DEFAULT_MODEL, trying recommended model: $RECOMMENDED_MODEL"
if ! timeout 1800 ollama pull "$RECOMMENDED_MODEL"; then
signal_failure "Failed to download any model"
fi
fi
# Create startup summary
cat > /home/ubuntu/deployment-summary.txt << EOF
==========================================
OLLAMA GPU DEPLOYMENT SUMMARY
==========================================
Instance Type: ${InstanceType}
GPUs: $GPU_COUNT x $(nvidia-smi --query-gpu=name --format=csv,noheader,nounits | head -1)
GPU Memory: ${FindInMap [InstanceSpecs, !Ref InstanceType, GPUMemory]}GB total
Ollama Endpoint: http://$(curl -s http://169.254.169.254/latest/meta-data/public-ipv4):11434
Available Models:
$(ollama list)
Test Commands:
curl http://localhost:11434/api/tags
ollama run $DEFAULT_MODEL
GPU Status:
$(nvidia-smi)
==========================================
EOF
chown ubuntu:ubuntu /home/ubuntu/deployment-summary.txt
log_info "Deployment completed successfully"
signal_success
TagSpecifications:
- ResourceType: instance
Tags:
- Key: Name
Value: !Sub '${ProjectName}-${Environment}-ollama-gpu'
- Key: Project
Value: !Ref ProjectName
- Key: Environment
Value: !Ref Environment
- Key: InstanceType
Value: !Ref InstanceType
# GPU Instance
GPUInstance:
Type: !If
- UseSpot
- AWS::EC2::SpotFleet
- AWS::EC2::Instance
Properties: !If
- UseSpot
- SpotFleetRequestConfig:
IamFleetRole: !Sub 'arn:aws:iam::${AWS::AccountId}:role/aws-ec2-spot-fleet-tagging-role'
AllocationStrategy: 'diversified'
TargetCapacity: 1
SpotPrice: !Ref SpotMaxPrice
LaunchTemplateConfigs:
- LaunchTemplateSpecification:
LaunchTemplateId: !Ref GPULaunchTemplate
Version: !GetAtt GPULaunchTemplate.LatestVersionNumber
Overrides:
- InstanceType: !Ref InstanceType
SubnetId: !Ref PublicSubnet
WeightedCapacity: 1
ReplaceUnhealthyInstances: true
Type: 'maintain'
- LaunchTemplate:
LaunchTemplateId: !Ref GPULaunchTemplate
Version: !GetAtt GPULaunchTemplate.LatestVersionNumber
SubnetId: !Ref PublicSubnet
Outputs:
InstanceType:
Description: 'Selected GPU instance type'
Value: !Ref InstanceType
ExpectedGPUs:
Description: 'Number of GPUs in the instance'
Value: !FindInMap [InstanceSpecs, !Ref InstanceType, GPUs]
TotalGPUMemory:
Description: 'Total GPU memory available'
Value: !Sub '${FindInMap [InstanceSpecs, !Ref InstanceType, GPUMemory]}GB'
RecommendedModel:
Description: 'Recommended model for this instance type'
Value: !FindInMap [InstanceSpecs, !Ref InstanceType, RecommendedModel]
OllamaEndpoint:
Description: 'Ollama API endpoint (get IP from EC2 console)'
Value: 'http://[INSTANCE-PUBLIC-IP]:11434'
SSHCommand:
Description: 'SSH command to connect to the instance'
Value: !Sub 'ssh -i ${KeyPairName}.pem ubuntu@[INSTANCE-PUBLIC-IP]'
EstimatedCostPerHour:
Description: 'Estimated cost per hour for selected instance type'
Value: !If
- UseSpot
- !Sub 'Spot: ~30-70% of On-Demand pricing (varies by availability)'
- !Sub 'On-Demand pricing - check AWS pricing page for current rates'
CostOptimizationTips:
Description: 'Tips to reduce costs'
Value: |
1. Use Spot instances for development/training (60-70% savings)
2. Stop instances when not in use
3. Consider smaller G5 instances for lighter workloads
4. Use Savings Plans for predictable workloads
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment