Skip to content

Instantly share code, notes, and snippets.

@dmattia
Last active March 11, 2025 20:05
Show Gist options
  • Save dmattia/b7b72a699def0494d9b181f63fada949 to your computer and use it in GitHub Desktop.
Save dmattia/b7b72a699def0494d9b181f63fada949 to your computer and use it in GitHub Desktop.
Sombra Helm Deployment with LLM
provider "aws" {
region = "eu-west-1"
}
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "~> 5.1.2"
name = "sombra-vpc"
cidr = "10.0.0.0/16"
azs = ["eu-west-1a", "eu-west-1b"]
private_subnets = ["10.0.101.0/24", "10.0.102.0/24"]
public_subnets = ["10.0.201.0/24", "10.0.202.0/24"]
enable_nat_gateway = true
enable_dns_hostnames = true
enable_dns_support = true
}
module "eks_cluster" {
source = "terraform-aws-modules/eks/aws"
version = "~> 20.31.6"
cluster_name = "sombra-eks-cluster"
cluster_version = "1.32"
vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets
cluster_endpoint_public_access = true
enable_cluster_creator_admin_permissions = true
cluster_compute_config = {
enabled = true
node_pools = ["general-purpose"]
}
}
###################################################################################
# Everything below here depends on the above kubernetes cluster already existing, #
# so you may want to comment out the below resources during the first apply #
###################################################################################
provider "kubernetes" {
host = module.eks_cluster.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks_cluster.cluster_certificate_authority_data)
exec {
api_version = "client.authentication.k8s.io/v1beta1"
args = ["eks", "get-token", "--cluster-name", module.eks_cluster.cluster_name]
command = "aws"
}
}
provider "helm" {
kubernetes {
host = module.eks_cluster.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks_cluster.cluster_certificate_authority_data)
exec {
api_version = "client.authentication.k8s.io/v1beta1"
args = ["eks", "get-token", "--cluster-name", module.eks_cluster.cluster_name]
command = "aws"
}
}
}
resource "kubernetes_manifest" "allow_gpu_node_pool" {
manifest = {
apiVersion = "karpenter.sh/v1"
kind = "NodePool"
metadata = {
name = "default"
}
spec = {
template = {
spec = {
nodeClassRef = {
group = "eks.amazonaws.com"
kind = "NodeClass"
name = "default"
}
requirements = [
{
key = "eks.amazonaws.com/instance-gpu-manufacturer"
operator = "In"
values = ["nvidia"]
},
# The A10G GPU has been tested to work well with our LLM Classifier
{
key = "eks.amazonaws.com/instance-gpu-name"
operator = "In"
values = ["a10g"]
}
]
}
}
limits = {
cpu = "16000"
memory = "64Gi"
}
}
}
}
resource "helm_release" "sombra" {
name = "sombra-servers"
namespace = "sombra"
chart = "sombra"
create_namespace = true
repository = "https://transcend-io.github.io/helm-charts/"
version = "0.5.0"
timeout = 900 # 15 minutes for any helm operation
values = [yamlencode({
imageCredentials = {
registry = "docker.transcend.io"
username = "Transcend"
password = "<DOCKER_API_KEY_FROM_STEP_1>"
}
replicaCount = 1
envs = [
{
name = "ORGANIZATION_URI"
value = "<ORGANIZATION_URI_FROM_STEP_2>"
},
{
name = "SOMBRA_ID"
value = "<SOMBRA_ID_FROM_STEP_2>"
},
{
name = "SOMBRA_REVERSE_TUNNEL_API_KEY"
value = "<SOMBRA_REVERSE_TUNNEL_API_KEY_FROM_STEP_2>"
},
{
name = "TRANSCEND_URL"
value = "<TRANSCEND_URL_FROM_STEP_2>"
},
{
name = "LLM_CLASSIFIER_URL"
value = "http://sombra-servers-llm-classifier.transcend.svc.cluster.local:6081/"
}
]
envs_as_secret = [
{
name = "JWT_ECDSA_KEY"
# This is just a sample key, in production you'd want to generate your own key via a command
# such as `openssl ecparam -genkey -name secp384r1 -noout | (base64 --wrap=0 2>/dev/null || base64 -b 0)`.
value = "LS0tLS1CRUdJTiBFQyBQUklWQVRFIEtFWS0tLS0tCk1JR2tBZ0VCQkRCVTdtcVg1MEdqa2dtMzZKME9zeEhzWGR3MG9NOEQ0dzZla3Y1RVJtV0lRSGM2c0tud043QkkKQk1uSlpYNHVPKytnQndZRks0RUVBQ0toWkFOaUFBUlQ1MFk3My8wdGhpazdvd2tOaFhmRWRtbi9UQ1pKM1ZzRQptT1JvUVJtUGlxZkhrQlpQRVZpSHIxMDRteHlYejNIc3JabEdHbDVnWmgwYnFXTHU2Z2R0Y1BtaXZhRWZtRU9KCkVnZE1iRXh4QjJtVEk2QWdKZk9EdFJZa2xUd0pqVWc9Ci0tLS0tRU5EIEVDIFBSSVZBVEUgS0VZLS0tLS0K"
}
]
llm-classifier = { enabled = true }
affinity = {
nodeAffinity = {
requiredDuringSchedulingIgnoredDuringExecution = {
nodeSelectorTerms = [{
matchExpressions = [{
key = "eks.amazonaws.com/instance-gpu-manufacturer"
operator = "DoesNotExist"
}]
}]
}
}
}
})]
}
@dmehra272
Copy link

api_version = "client.authentication.k8s.io/v1beta1"

@dmattia This is still a Beta version ? I would suggest update to simply v1, so the customer is aware they are hitting a production endpoint.

@dmehra272
Copy link

dmehra272 commented Mar 6, 2025

@dmattia we will need to add one for new gliNER algorithm as well here. Per Alessandro this is already included in the same pod, could we add a clarifying comment in this file that both LLM (Structured Disc) and GliNER (Unstructed Disc) are available in same pod.

@dmehra272
Copy link

"And optionally replace the JWT_ECDSA_KEY secret environment variable's value with your own encryption key, which you can get by running openssl ecparam -genkey -name secp384r1 -noout | (base64 --wrap=0 2>/dev/null || base64 -b 0)"

Can we write an example how to read from AWS KMS, as mentioned as supported in earlier sections?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment