Files
netris-nestri/infra:old/cluster.ts
Wanjohi 457aac2258 feat(infra): Update infra and add support for teams to SST (#186)
## Description
- [x] Adds support for AWS SSO, which makes us (the team) able to use
SST and update the components independently
- [x] Splits the webpage into the landing page (Qwik), and Astro (the
console) in charge of playing. This allows us to pass in Environment
Variables to the console
- ~Migrates the docs from Nuxt to Nextjs, and connects them to SST. This
allows us to use Fumadocs _citation needed_ that's much more beautiful,
and supports OpenApi~
- Cloudflare pages with github integration is not working on our new CF
account. So we will have to push the pages deployment manually with
Github actions
- [x] Moves the current set up from my personal CF and AWS accounts to
dedicated Nestri accounts -

## Related Issues
<!-- List any related issues (e.g., "Closes #123", "Fixes #456") -->

## Type of Change

- [ ] Bug fix (non-breaking change)
- [x] New feature (non-breaking change)
- [ ] Breaking change (fix or feature that changes existing
functionality)
- [x] Documentation update
- [ ] Other (please describe):

## Checklist

- [x] I have updated relevant documentation
- [x] My code follows the project's coding style
- [x] My changes generate no new warnings/errors

## Notes for Reviewers
<!-- Point out areas you'd like reviewers to focus on, questions you
have, or decisions that need discussion -->
Please approve my PR 🥹


## Screenshots/Demo
<!-- If applicable, add screenshots or a GIF demo of your changes
(especially for UI changes) -->

## Additional Context
<!-- Add any other context about the pull request here -->
2025-02-27 18:52:05 +03:00

156 lines
4.9 KiB
TypeScript

import { sshKey } from "./ssh";
import { authFingerprintKey } from "./auth";
export const ecsCluster = new aws.ecs.Cluster("NestriGPUCluster", {
name: "NestriGPUCluster",
});
const ecsInstanceRole = new aws.iam.Role("NestriGPUInstanceRole", {
name: "GPUAssumeRoleProd",
assumeRolePolicy: JSON.stringify({
Version: "2012-10-17",
Statement: [{
Action: "sts:AssumeRole",
Principal: {
Service: "ec2.amazonaws.com",
},
Effect: "Allow",
Sid: "",
}],
}),
});
new aws.iam.RolePolicyAttachment("NestriGPUInstancePolicyAttachment", {
role: ecsInstanceRole.name,
policyArn: "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role",
});
const ecsInstanceProfile = new aws.iam.InstanceProfile("NestriGPUInstanceProfile", {
role: ecsInstanceRole.name,
});
// const server = new aws.ec2.Instance("NestriGPU", {
// instanceType: aws.ec2.InstanceType.G4dn_XLarge,
// ami: "ami-046a6af96ef510bb6",//Fedora cloud
// keyName: sshKey.keyName,
// instanceMarketOptions: {
// marketType: "spot",
// spotOptions: {
// maxPrice: "0.2",
// spotInstanceType: "persistent",
// instanceInterruptionBehavior: "stop"
// },
// },
// iamInstanceProfile: ecsInstanceProfile,
// });
const logGroup = new aws.cloudwatch.LogGroup("NestriGPULogGroup", {
name: "/ecs/nestri-gpu-prod",
retentionInDays: 7,
});
// Create a Task Definition for the ECS service to test it
export const gpuTaskDefinition = new aws.ecs.TaskDefinition("NestriGPUTask", {
family: "NestriGPUTaskProd",
requiresCompatibilities: ["EC2"],
volumes: [
{
name: "host",
hostPath: "/mnt/"
// efsVolumeConfiguration: {
// fileSystemId: storage.id,
// authorizationConfig: { accessPointId: storage.accessPoint },
// transitEncryption: "ENABLED",
// }
}
],
containerDefinitions: authFingerprintKey.result.apply(v => JSON.stringify([{
"essential": true,
"name": "nestri",
"memory": 1024,
"cpu": 200,
"gpu": 1,
"image": "ghcr.io/nestrilabs/nestri/runner:nightly",
"environment": [
{
"name": "RESOLUTION",
"value": "1920x1080"
},
{
"name": "AUTH_FINGERPRINT",
"value": v
},
{
"name": "FRAMERATE",
"value": "60"
},
{
"name": "NESTRI_ROOM",
"value": "aws-testing"
},
{
"name": "RELAY_URL",
"value": "https://relay.dathorse.com"
},
{
"name": "NESTRI_PARAMS",
"value": "--verbose=true --video-codec=h264 --video-bitrate=4000 --video-bitrate-max=6000 --gpu-card-path=/dev/dri/card0"
},
],
"mountPoints": [{ "containerPath": "/home/nestri", "sourceVolume": "host" }],
"disableNetworking": false,
"linuxParameter": {
"sharedMemorySize": 5120
},
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "/ecs/nestri-gpu-prod",
"awslogs-region": "us-east-1",
"awslogs-stream-prefix": "nestri-gpu-task"
}
}
}]))
});
sst.Linkable.wrap(aws.ecs.TaskDefinition, (resource) => ({
properties: {
value: resource.arn,
},
}));
sst.Linkable.wrap(aws.ecs.Cluster, (resource) => ({
properties: {
value: resource.arn,
},
}));
// userData: $interpolate`#!/bin/bash
// sudo rm /etc/sysconfig/docker
// echo DAEMON_MAXFILES=1048576 | sudo tee -a /etc/sysconfig/docker
// echo DAEMON_PIDFILE_TIMEOUT=10 | sud o tee -a /etc/sysconfig/docker
// echo OPTIONS="--default-ulimit nofile=32768:65536" | sudo tee -a /etc/sysconfig/docker
// sudo tee "/etc/docker/daemon.json" > /dev/null <<EOF
// {
// "default-runtime": "nvidia",
// "runtimes": {
// "nvidia": {
// "path": "/usr/bin/nvidia-container-runtime",
// "runtimeArgs": []
// }
// }
// }
// EOF
// sudo systemctl restart docker
// echo ECS_CLUSTER='${ecsCluster.name}' | sudo tee -a /etc/ecs/ecs.config
// echo ECS_ENABLE_GPU_SUPPORT=true | sudo tee -a /etc/ecs/ecs.config
// echo ECS_CONTAINER_STOP_TIMEOUT=3h | sudo tee -a /etc/ecs/ecs.config
// echo ECS_ENABLE_SPOT_INSTANCE_DRAINING=true | sudo tee -a /etc/ecs/ecs.config
// `,
// This is used for requesting a container to be deployed on AWS
// const queue = new sst.aws.Queue("PartyQueue", { fifo: true });
// queue.subscribe({ handler: "packages/functions/src/party/subscriber.handler", permissions:{}, link:[taskF]})
// const authRes = $interpolate`${authFingerprintKey.result}`