feat(maitred): Update maitred - hookup to the API (#198)

## Description
We are attempting to hookup maitred to the API
Maitred duties will be:
- [ ] Hookup to the API
- [ ]  Wait for signal (from the API) to start Steam
- [ ] Stop signal to stop the gaming session, clean up Steam... and
maybe do the backup

## Summary by CodeRabbit

- **New Features**
- Introduced Docker-based deployment configurations for both the main
and relay applications.
- Added new API endpoints enabling real-time machine messaging and
enhanced IoT operations.
- Expanded database schema and actor types to support improved machine
tracking.

- **Improvements**
- Enhanced real-time communication and relay management with streamlined
room handling.
- Upgraded dependencies, logging, and error handling for greater
stability and performance.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: DatCaptainHorse <DatCaptainHorse@users.noreply.github.com>
Co-authored-by: Kristian Ollikainen <14197772+DatCaptainHorse@users.noreply.github.com>
This commit is contained in:
Wanjohi
2025-04-07 23:23:53 +03:00
committed by GitHub
parent 6990494b34
commit de80f3e6ab
84 changed files with 7357 additions and 1331 deletions

View File

@@ -1,25 +1,39 @@
module nestri/maitred
go 1.23.3
go 1.24
require (
github.com/charmbracelet/log v0.4.0
github.com/docker/docker v28.0.1+incompatible
github.com/eclipse/paho.golang v0.22.0
github.com/oklog/ulid/v2 v2.1.0
)
require (
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/charmbracelet/lipgloss v0.10.0 // indirect
github.com/go-logfmt/logfmt v0.6.0 // indirect
github.com/Microsoft/go-winio v0.4.14 // indirect
github.com/containerd/log v0.1.0 // indirect
github.com/distribution/reference v0.6.0 // indirect
github.com/docker/go-connections v0.5.0 // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/gorilla/websocket v1.5.3 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/mattn/go-isatty v0.0.18 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/muesli/reflow v0.3.0 // indirect
github.com/muesli/termenv v0.15.2 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect
golang.org/x/net v0.33.0 // indirect
golang.org/x/sys v0.28.0 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/moby/term v0.5.2 // indirect
github.com/morikuni/aec v1.0.0 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/pkg/errors v0.9.1 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect
go.opentelemetry.io/otel v1.34.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.34.0 // indirect
go.opentelemetry.io/otel/metric v1.34.0 // indirect
go.opentelemetry.io/otel/sdk v1.34.0 // indirect
go.opentelemetry.io/otel/trace v1.34.0 // indirect
golang.org/x/net v0.34.0 // indirect
golang.org/x/sys v0.29.0 // indirect
golang.org/x/time v0.10.0 // indirect
gotest.tools/v3 v3.5.2 // indirect
)

View File

@@ -1,49 +1,134 @@
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/charmbracelet/lipgloss v0.10.0 h1:KWeXFSexGcfahHX+54URiZGkBFazf70JNMtwg/AFW3s=
github.com/charmbracelet/lipgloss v0.10.0/go.mod h1:Wig9DSfvANsxqkRsqj6x87irdy123SR4dOXlKa91ciE=
github.com/charmbracelet/log v0.4.0 h1:G9bQAcx8rWA2T3pWvx7YtPTPwgqpk7D68BX21IRW8ZM=
github.com/charmbracelet/log v0.4.0/go.mod h1:63bXt/djrizTec0l11H20t8FDSvA4CRZJ1KH22MdptM=
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg=
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/Microsoft/go-winio v0.4.14 h1:+hMXMk01us9KgxGb7ftKQt2Xpf5hH/yky+TDA+qxleU=
github.com/Microsoft/go-winio v0.4.14/go.mod h1:qXqCSQ3Xa7+6tgxaGTIe4Kpcdsi+P8jBhyzoq1bpyYA=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/docker v28.0.1+incompatible h1:FCHjSRdXhNRFjlHMTv4jUNlIBbTeRjrWfeFuJp7jpo0=
github.com/docker/docker v28.0.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/eclipse/paho.golang v0.22.0 h1:JhhUngr8TBlyUZDZw/L6WVayPi9qmSmdWeki48i5AVE=
github.com/eclipse/paho.golang v0.22.0/go.mod h1:9ZiYJ93iEfGRJri8tErNeStPKLXIGBHiqbHV74t5pqI=
github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4=
github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98=
github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo=
github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 h1:VNqngBF40hVlDloBruUehVYC3ArSgIyScOAyMRqBxRg=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1/go.mod h1:RBRO7fro65R6tjKzYgLAFo0t1QEXY1Dp+i/bvpRiqiQ=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/oklog/ulid/v2 v2.1.0 h1:+9lhoxAP56we25tyYETBBY1YLA2SaoLvUFgrP2miPJU=
github.com/oklog/ulid/v2 v2.1.0/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 h1:CV7UdSGJt/Ao6Gp4CXckLxVRRsRgDHoI8XjbL3PDl8s=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0/go.mod h1:FRmFuRJfag1IZ2dPkHnEoSFVgTVPUd2qf5Vi69hLb8I=
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 h1:OeNbIYk/2C15ckl7glBlOBp5+WlYsOElzTNmiPW/x60=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0/go.mod h1:7Bept48yIeqxP2OZ9/AqIpYS94h2or0aB4FypJTc8ZM=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.34.0 h1:BEj3SPM81McUZHYjRS5pEgNgnmzGJ5tRpU5krWnV8Bs=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.34.0/go.mod h1:9cKLGBDzI/F3NoHLQGm4ZrYdIHsvGt6ej6hUowxY0J4=
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4=
go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4=
go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A=
go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4=
golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/genproto/googleapis/api v0.0.0-20250115164207-1a7da9e5054f h1:gap6+3Gk41EItBuyi4XX/bp4oqJ3UwuIMl25yGinuAA=
google.golang.org/genproto/googleapis/api v0.0.0-20250115164207-1a7da9e5054f/go.mod h1:Ic02D47M+zbarjYYUlK57y316f2MoN0gjAwI3f2S95o=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f h1:OxYkA3wjPsZyBylwymxSHa7ViiW1Sml4ToBrncvFehI=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f/go.mod h1:+2Yz8+CLJbIfL9z73EW45avw8Lmge3xVElCP9zEKi50=
google.golang.org/grpc v1.69.4 h1:MF5TftSMkd8GLw/m0KM6V8CMOCY6NZ1NQDPGFgbTt4A=
google.golang.org/grpc v1.69.4/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4=
google.golang.org/protobuf v1.36.3 h1:82DV7MYdb8anAVi3qge1wSnMDrnKK7ebr+I0hHRN1BU=
google.golang.org/protobuf v1.36.3/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q=
gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA=

View File

@@ -0,0 +1,45 @@
package auth
import (
"encoding/json"
"fmt"
"io"
"log/slog"
"nestri/maitred/internal/resource"
"net/http"
"net/url"
)
type UserCredentials struct {
AccessToken string `json:"access_token"`
RefreshToken string `json:"refresh_token"`
}
func FetchUserToken(machineID string, resource *resource.Resource) (*UserCredentials, error) {
data := url.Values{}
data.Set("grant_type", "client_credentials")
data.Set("client_id", "maitred")
data.Set("client_secret", resource.AuthFingerprintKey.Value)
data.Set("fingerprint", machineID)
data.Set("provider", "machine")
resp, err := http.PostForm(resource.Auth.Url+"/token", data)
if err != nil {
return nil, err
}
defer func(Body io.ReadCloser) {
err = Body.Close()
if err != nil {
slog.Error("Error closing body", "err", err)
}
}(resp.Body)
if resp.StatusCode != 200 {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("failed to auth: " + string(body))
}
credentials := UserCredentials{}
err = json.NewDecoder(resp.Body).Decode(&credentials)
if err != nil {
return nil, err
}
return &credentials, nil
}

View File

@@ -0,0 +1,38 @@
package containers
import (
"context"
"fmt"
)
// Container represents a container instance
type Container struct {
ID string
Name string
State string
Image string
}
// ContainerEngine defines the common interface for differing container engines
type ContainerEngine interface {
Close() error
ListContainers(ctx context.Context) ([]Container, error)
ListContainersByImage(ctx context.Context, img string) ([]Container, error)
NewContainer(ctx context.Context, img string, envs []string) (string, error)
StartContainer(ctx context.Context, id string) error
StopContainer(ctx context.Context, id string) error
RemoveContainer(ctx context.Context, id string) error
InspectContainer(ctx context.Context, id string) (*Container, error)
PullImage(ctx context.Context, img string) error
Info(ctx context.Context) (string, error)
LogsContainer(ctx context.Context, id string) (string, error)
}
func NewContainerEngine() (ContainerEngine, error) {
dockerEngine, err := NewDockerEngine()
if err == nil {
return dockerEngine, nil
}
return nil, fmt.Errorf("failed to create container engine: %w", err)
}

View File

@@ -0,0 +1,299 @@
package containers
import (
"context"
"encoding/json"
"fmt"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/image"
"github.com/docker/docker/client"
"io"
"log/slog"
"strings"
"time"
)
// DockerEngine implements the ContainerEngine interface for Docker / Docker compatible engines
type DockerEngine struct {
cli *client.Client
}
func NewDockerEngine() (*DockerEngine, error) {
cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
if err != nil {
return nil, fmt.Errorf("failed to create Docker client: %w", err)
}
return &DockerEngine{cli: cli}, nil
}
func (d *DockerEngine) Close() error {
return d.cli.Close()
}
func (d *DockerEngine) ListContainers(ctx context.Context) ([]Container, error) {
containerList, err := d.cli.ContainerList(ctx, container.ListOptions{})
if err != nil {
return nil, fmt.Errorf("failed to list containers: %w", err)
}
var result []Container
for _, c := range containerList {
result = append(result, Container{
ID: c.ID,
Name: strings.TrimPrefix(strings.Join(c.Names, ","), "/"),
State: c.State,
Image: c.Image,
})
}
return result, nil
}
func (d *DockerEngine) ListContainersByImage(ctx context.Context, img string) ([]Container, error) {
if len(img) <= 0 {
return nil, fmt.Errorf("image name cannot be empty")
}
containerList, err := d.cli.ContainerList(ctx, container.ListOptions{})
if err != nil {
return nil, fmt.Errorf("failed to list containers: %w", err)
}
var result []Container
for _, c := range containerList {
if c.Image == img {
result = append(result, Container{
ID: c.ID,
Name: strings.TrimPrefix(strings.Join(c.Names, ","), "/"),
State: c.State,
Image: c.Image,
})
}
}
return result, nil
}
func (d *DockerEngine) NewContainer(ctx context.Context, img string, envs []string) (string, error) {
// Create a new container with the given image and environment variables
resp, err := d.cli.ContainerCreate(ctx, &container.Config{
Image: img,
Env: envs,
}, &container.HostConfig{
NetworkMode: "host",
}, nil, nil, "")
if err != nil {
return "", fmt.Errorf("failed to create container: %w", err)
}
if len(resp.ID) <= 0 {
return "", fmt.Errorf("failed to create container, no ID returned")
}
return resp.ID, nil
}
func (d *DockerEngine) StartContainer(ctx context.Context, id string) error {
err := d.cli.ContainerStart(ctx, id, container.StartOptions{})
if err != nil {
return fmt.Errorf("failed to start container: %w", err)
}
// Wait for the container to start
if err = d.waitForContainer(ctx, id, "running"); err != nil {
return fmt.Errorf("container failed to reach running state: %w", err)
}
return nil
}
func (d *DockerEngine) StopContainer(ctx context.Context, id string) error {
// Waiter for the container to stop
respChan, errChan := d.cli.ContainerWait(ctx, id, container.WaitConditionNotRunning)
// Stop the container
err := d.cli.ContainerStop(ctx, id, container.StopOptions{})
if err != nil {
return fmt.Errorf("failed to stop container: %w", err)
}
select {
case <-respChan:
// Container stopped successfully
break
case err = <-errChan:
if err != nil {
return fmt.Errorf("failed to wait for container to stop: %w", err)
}
case <-ctx.Done():
return fmt.Errorf("context canceled while waiting for container to stop")
}
return nil
}
func (d *DockerEngine) RemoveContainer(ctx context.Context, id string) error {
// Waiter for the container to be removed
respChan, errChan := d.cli.ContainerWait(ctx, id, container.WaitConditionRemoved)
err := d.cli.ContainerRemove(ctx, id, container.RemoveOptions{})
if err != nil {
return fmt.Errorf("failed to remove container: %w", err)
}
select {
case <-respChan:
// Container removed successfully
break
case err = <-errChan:
if err != nil {
return fmt.Errorf("failed to wait for container to be removed: %w", err)
}
case <-ctx.Done():
return fmt.Errorf("context canceled while waiting for container to stop")
}
return nil
}
func (d *DockerEngine) InspectContainer(ctx context.Context, id string) (*Container, error) {
info, err := d.cli.ContainerInspect(ctx, id)
if err != nil {
return nil, fmt.Errorf("failed to inspect container: %w", err)
}
return &Container{
ID: info.ID,
Name: info.Name,
State: info.State.Status,
Image: info.Config.Image,
}, nil
}
func (d *DockerEngine) PullImage(ctx context.Context, img string) error {
if len(img) <= 0 {
return fmt.Errorf("image name cannot be empty")
}
slog.Info("Starting image pull", "image", img)
reader, err := d.cli.ImagePull(ctx, img, image.PullOptions{})
if err != nil {
return fmt.Errorf("failed to start image pull for %s: %w", img, err)
}
defer func(reader io.ReadCloser) {
err = reader.Close()
if err != nil {
slog.Warn("Failed to close reader", "err", err)
}
}(reader)
// Parse the JSON stream for progress
decoder := json.NewDecoder(reader)
lastDownloadPercent := 0
downloadTotals := make(map[string]int64)
downloadCurrents := make(map[string]int64)
var msg struct {
ID string `json:"id"`
Status string `json:"status"`
ProgressDetail struct {
Current int64 `json:"current"`
Total int64 `json:"total"`
} `json:"progressDetail"`
}
for {
err = decoder.Decode(&msg)
if err == io.EOF {
break // Pull completed
}
if err != nil {
return fmt.Errorf("error decoding pull response for %s: %w", img, err)
}
// Skip if no progress details or ID
if msg.ID == "" || msg.ProgressDetail.Total == 0 {
continue
}
if strings.Contains(strings.ToLower(msg.Status), "downloading") {
downloadTotals[msg.ID] = msg.ProgressDetail.Total
downloadCurrents[msg.ID] = msg.ProgressDetail.Current
var total, current int64
for _, t := range downloadTotals {
total += t
}
for _, c := range downloadCurrents {
current += c
}
percent := int((float64(current) / float64(total)) * 100)
if percent >= lastDownloadPercent+10 && percent <= 100 {
slog.Info("Download progress", "image", img, "percent", percent)
lastDownloadPercent = percent - (percent % 10)
}
}
}
slog.Info("Pulled image", "image", img)
return nil
}
func (d *DockerEngine) Info(ctx context.Context) (string, error) {
info, err := d.cli.Info(ctx)
if err != nil {
return "", fmt.Errorf("failed to get Docker info: %w", err)
}
return fmt.Sprintf("Docker Engine Version: %s", info.ServerVersion), nil
}
func (d *DockerEngine) LogsContainer(ctx context.Context, id string) (string, error) {
reader, err := d.cli.ContainerLogs(ctx, id, container.LogsOptions{ShowStdout: true, ShowStderr: true})
if err != nil {
return "", fmt.Errorf("failed to get container logs: %w", err)
}
defer func(reader io.ReadCloser) {
err = reader.Close()
if err != nil {
slog.Warn("Failed to close reader", "err", err)
}
}(reader)
logs, err := io.ReadAll(reader)
if err != nil {
return "", fmt.Errorf("failed to read container logs: %w", err)
}
return string(logs), nil
}
func (d *DockerEngine) waitForContainer(ctx context.Context, id, desiredState string) error {
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
for {
// Inspect the container to get its current state
inspection, err := d.cli.ContainerInspect(ctx, id)
if err != nil {
return fmt.Errorf("failed to inspect container: %w", err)
}
// Check the container's state
currentState := strings.ToLower(inspection.State.Status)
switch currentState {
case desiredState:
// Container is in the desired state (e.g., "running")
return nil
case "exited", "dead", "removing":
// Container failed or stopped unexpectedly, get logs and return error
logs, _ := d.LogsContainer(ctx, id)
return fmt.Errorf("container failed to reach %s state, logs: %s", desiredState, logs)
}
// Wait before polling again
select {
case <-ctx.Done():
return fmt.Errorf("timed out after 10s waiting for container to reach %s state", desiredState)
case <-time.After(1 * time.Second):
// Continue polling
}
}
}

View File

@@ -0,0 +1,70 @@
package internal
import (
"flag"
"log/slog"
"os"
"strconv"
)
var globalFlags *Flags
type Flags struct {
Verbose bool // Log everything to console
Debug bool // Enable debug mode, implies Verbose - disables SST and MQTT connections
NoMonitor bool // Disable system monitoring
}
func (flags *Flags) DebugLog() {
slog.Info("Maitred flags",
"verbose", flags.Verbose,
"debug", flags.Debug,
"no-monitor", flags.NoMonitor,
)
}
func getEnvAsInt(name string, defaultVal int) int {
valueStr := os.Getenv(name)
if value, err := strconv.Atoi(valueStr); err != nil {
return defaultVal
} else {
return value
}
}
func getEnvAsBool(name string, defaultVal bool) bool {
valueStr := os.Getenv(name)
val, err := strconv.ParseBool(valueStr)
if err != nil {
return defaultVal
}
return val
}
func getEnvAsString(name string, defaultVal string) string {
valueStr := os.Getenv(name)
if len(valueStr) == 0 {
return defaultVal
}
return valueStr
}
func InitFlags() {
// Create Flags struct
globalFlags = &Flags{}
// Get flags
flag.BoolVar(&globalFlags.Verbose, "verbose", getEnvAsBool("VERBOSE", false), "Verbose mode")
flag.BoolVar(&globalFlags.Debug, "debug", getEnvAsBool("DEBUG", false), "Debug mode")
flag.BoolVar(&globalFlags.NoMonitor, "no-monitor", getEnvAsBool("NO_MONITOR", false), "Disable system monitoring")
// Parse flags
flag.Parse()
// If debug is enabled, verbose is also enabled
if globalFlags.Debug {
globalFlags.Verbose = true
}
}
func GetFlags() *Flags {
return globalFlags
}

View File

@@ -0,0 +1,48 @@
package internal
import (
"context"
"fmt"
"log/slog"
"os"
"strings"
)
type CustomHandler struct {
Handler slog.Handler
}
func (h *CustomHandler) Enabled(_ context.Context, level slog.Level) bool {
return h.Handler.Enabled(nil, level)
}
func (h *CustomHandler) Handle(_ context.Context, r slog.Record) error {
// Format the timestamp as "2006/01/02 15:04:05"
timestamp := r.Time.Format("2006/01/02 15:04:05")
// Convert level to uppercase string (e.g., "INFO")
level := strings.ToUpper(r.Level.String())
// Build the message
msg := fmt.Sprintf("%s %s %s", timestamp, level, r.Message)
// Handle additional attributes if they exist
var attrs []string
r.Attrs(func(a slog.Attr) bool {
attrs = append(attrs, fmt.Sprintf("%s=%v", a.Key, a.Value))
return true
})
if len(attrs) > 0 {
msg += " " + strings.Join(attrs, " ")
}
// Write the formatted message to stdout
_, err := fmt.Fprintln(os.Stdout, msg)
return err
}
func (h *CustomHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
return &CustomHandler{Handler: h.Handler.WithAttrs(attrs)}
}
func (h *CustomHandler) WithGroup(name string) slog.Handler {
return &CustomHandler{Handler: h.Handler.WithGroup(name)}
}

View File

@@ -0,0 +1,366 @@
package realtime
import (
"context"
"fmt"
"log/slog"
"nestri/maitred/internal"
"nestri/maitred/internal/containers"
"strings"
"sync"
"time"
)
var (
nestriRunnerImage = "ghcr.io/nestrilabs/nestri/runner:nightly"
nestriRelayImage = "ghcr.io/nestrilabs/nestri/relay:nightly"
)
type ManagedContainerType int
const (
// Runner is the nestri runner container
Runner ManagedContainerType = iota
// Relay is the nestri relay container
Relay
)
// ManagedContainer type with extra information fields
type ManagedContainer struct {
containers.Container
Type ManagedContainerType
}
// managedContainers is a map of containers that are managed by us (maitred)
var (
managedContainers = make(map[string]ManagedContainer)
managedContainersMutex sync.RWMutex
)
// InitializeManager handles the initialization of the managed containers and pulls their latest images
func InitializeManager(ctx context.Context, ctrEngine containers.ContainerEngine) error {
// If debug, override the images
if internal.GetFlags().Debug {
nestriRunnerImage = "ghcr.io/datcaptainhorse/nestri-cachyos:latest-v3"
nestriRelayImage = "ghcr.io/datcaptainhorse/nestri-relay:latest"
}
// Look for existing stopped runner containers and remove them
slog.Info("Checking and removing old runner containers")
oldRunners, err := ctrEngine.ListContainersByImage(ctx, nestriRunnerImage)
if err != nil {
return err
}
for _, c := range oldRunners {
// If running, stop first
if strings.Contains(strings.ToLower(c.State), "running") {
slog.Info("Stopping old runner container", "id", c.ID)
if err = ctrEngine.StopContainer(ctx, c.ID); err != nil {
return err
}
}
slog.Info("Removing old runner container", "id", c.ID)
if err = ctrEngine.RemoveContainer(ctx, c.ID); err != nil {
return err
}
}
// Pull the runner image if not in debug mode
if !internal.GetFlags().Debug {
slog.Info("Pulling runner image", "image", nestriRunnerImage)
if err := ctrEngine.PullImage(ctx, nestriRunnerImage); err != nil {
return fmt.Errorf("failed to pull runner image: %w", err)
}
}
// Look for existing stopped relay containers and remove them
slog.Info("Checking and removing old relay containers")
oldRelays, err := ctrEngine.ListContainersByImage(ctx, nestriRelayImage)
if err != nil {
return err
}
for _, c := range oldRelays {
// If running, stop first
if strings.Contains(strings.ToLower(c.State), "running") {
slog.Info("Stopping old relay container", "id", c.ID)
if err = ctrEngine.StopContainer(ctx, c.ID); err != nil {
return err
}
}
slog.Info("Removing old relay container", "id", c.ID)
if err = ctrEngine.RemoveContainer(ctx, c.ID); err != nil {
return err
}
}
// Pull the relay image if not in debug mode
if !internal.GetFlags().Debug {
slog.Info("Pulling relay image", "image", nestriRelayImage)
if err := ctrEngine.PullImage(ctx, nestriRelayImage); err != nil {
return fmt.Errorf("failed to pull relay image: %w", err)
}
}
return nil
}
// CreateRunner creates a new runner image container
func CreateRunner(ctx context.Context, ctrEngine containers.ContainerEngine) (string, error) {
// For safety, limit to 4 runners
if CountRunners() >= 4 {
return "", fmt.Errorf("maximum number of runners reached")
}
// Create the container
containerID, err := ctrEngine.NewContainer(ctx, nestriRunnerImage, nil)
if err != nil {
return "", err
}
// Add the container to the managed list
managedContainersMutex.Lock()
defer managedContainersMutex.Unlock()
managedContainers[containerID] = ManagedContainer{
Container: containers.Container{
ID: containerID,
},
Type: Runner,
}
return containerID, nil
}
// StartRunner starts a runner container, keeping track of it's state
func StartRunner(ctx context.Context, ctrEngine containers.ContainerEngine, id string) error {
// Verify the container is part of the managed list
managedContainersMutex.RLock()
if _, ok := managedContainers[id]; !ok {
managedContainersMutex.RUnlock()
return fmt.Errorf("container %s is not managed", id)
}
managedContainersMutex.RUnlock()
// Start the container
if err := ctrEngine.StartContainer(ctx, id); err != nil {
return err
}
// Check container status in background at 10 second intervals, if it exits print it's logs
go func() {
err := monitorContainer(ctx, ctrEngine, id)
if err != nil {
slog.Error("failure while monitoring runner container", "id", id, "err", err)
return
}
}()
return nil
}
// RemoveRunner removes a runner container
func RemoveRunner(ctx context.Context, ctrEngine containers.ContainerEngine, id string) error {
// Stop the container if it's running
if strings.Contains(strings.ToLower(managedContainers[id].State), "running") {
if err := ctrEngine.StopContainer(ctx, id); err != nil {
return err
}
}
// Remove the container
if err := ctrEngine.RemoveContainer(ctx, id); err != nil {
return err
}
// Remove the container from the managed list
managedContainersMutex.Lock()
defer managedContainersMutex.Unlock()
delete(managedContainers, id)
return nil
}
// ListRunners returns a list of all runner containers
func ListRunners() []ManagedContainer {
managedContainersMutex.Lock()
defer managedContainersMutex.Unlock()
var runners []ManagedContainer
for _, v := range managedContainers {
if v.Type == Runner {
runners = append(runners, v)
}
}
return runners
}
// CountRunners returns the number of runner containers
func CountRunners() int {
return len(ListRunners())
}
// CreateRelay creates a new relay image container
func CreateRelay(ctx context.Context, ctrEngine containers.ContainerEngine) (string, error) {
// Limit to 1 relay
if CountRelays() >= 1 {
return "", fmt.Errorf("maximum number of relays reached")
}
// TODO: Placeholder for control secret, should be generated at runtime
secretEnv := fmt.Sprintf("CONTROL_SECRET=%s", "1234")
// Create the container
containerID, err := ctrEngine.NewContainer(ctx, nestriRelayImage, []string{secretEnv})
if err != nil {
return "", err
}
// Add the container to the managed list
managedContainersMutex.Lock()
defer managedContainersMutex.Unlock()
managedContainers[containerID] = ManagedContainer{
Container: containers.Container{
ID: containerID,
},
Type: Relay,
}
return containerID, nil
}
// StartRelay starts a relay container, keeping track of it's state
func StartRelay(ctx context.Context, ctrEngine containers.ContainerEngine, id string) error {
// Verify the container is part of the managed list
managedContainersMutex.RLock()
if _, ok := managedContainers[id]; !ok {
managedContainersMutex.RUnlock()
return fmt.Errorf("container %s is not managed", id)
}
managedContainersMutex.RUnlock()
// Start the container
if err := ctrEngine.StartContainer(ctx, id); err != nil {
return err
}
// Check container status in background at 10 second intervals, if it exits print it's logs
go func() {
err := monitorContainer(ctx, ctrEngine, id)
if err != nil {
slog.Error("failure while monitoring relay container", "id", id, "err", err)
return
}
}()
return nil
}
// RemoveRelay removes a relay container
func RemoveRelay(ctx context.Context, ctrEngine containers.ContainerEngine, id string) error {
// Stop the container if it's running
if strings.Contains(strings.ToLower(managedContainers[id].State), "running") {
if err := ctrEngine.StopContainer(ctx, id); err != nil {
return err
}
}
// Remove the container
if err := ctrEngine.RemoveContainer(ctx, id); err != nil {
return err
}
// Remove the container from the managed list
managedContainersMutex.Lock()
defer managedContainersMutex.Unlock()
delete(managedContainers, id)
return nil
}
// ListRelays returns a list of all relay containers
func ListRelays() []ManagedContainer {
managedContainersMutex.Lock()
defer managedContainersMutex.Unlock()
var relays []ManagedContainer
for _, v := range managedContainers {
if v.Type == Relay {
relays = append(relays, v)
}
}
return relays
}
// CountRelays returns the number of relay containers
func CountRelays() int {
return len(ListRelays())
}
// CleanupManaged stops and removes all managed containers
func CleanupManaged(ctx context.Context, ctrEngine containers.ContainerEngine) error {
if len(managedContainers) <= 0 {
return nil
}
slog.Info("Cleaning up managed containers")
managedContainersMutex.Lock()
defer managedContainersMutex.Unlock()
for id := range managedContainers {
// If running, stop first
if strings.Contains(strings.ToLower(managedContainers[id].State), "running") {
slog.Info("Stopping managed container", "id", id)
if err := ctrEngine.StopContainer(ctx, id); err != nil {
return err
}
}
// Remove the container
slog.Info("Removing managed container", "id", id)
if err := ctrEngine.RemoveContainer(ctx, id); err != nil {
return err
}
// Remove from the managed list
delete(managedContainers, id)
}
return nil
}
func monitorContainer(ctx context.Context, ctrEngine containers.ContainerEngine, id string) error {
for {
select {
case <-ctx.Done():
return nil
default:
// Check the container status
ctr, err := ctrEngine.InspectContainer(ctx, id)
if err != nil {
return fmt.Errorf("failed to inspect container: %w", err)
}
// Update the container state in the managed list
managedContainersMutex.Lock()
managedContainers[id] = ManagedContainer{
Container: containers.Container{
ID: ctr.ID,
Name: ctr.Name,
State: ctr.State,
Image: ctr.Image,
},
Type: Relay,
}
managedContainersMutex.Unlock()
if !strings.Contains(strings.ToLower(ctr.State), "running") {
// Container is not running, print logs
logs, err := ctrEngine.LogsContainer(ctx, id)
if err != nil {
return fmt.Errorf("failed to get container logs: %w", err)
}
return fmt.Errorf("container %s stopped running: %s", id, logs)
}
}
// Sleep for 10 seconds
select {
case <-ctx.Done():
return nil
case <-time.After(10 * time.Second):
}
}
}

View File

@@ -0,0 +1,52 @@
package realtime
import (
"encoding/json"
)
// BaseMessage is the generic top-level message structure
type BaseMessage struct {
Type string `json:"type"`
Payload json.RawMessage `json:"payload"`
}
type CreatePayload struct{}
type StartPayload struct {
ContainerID string `json:"container_id"`
}
type StopPayload struct {
ContainerID string `json:"container_id"`
}
// ParseMessage parses a BaseMessage and returns the specific payload
func ParseMessage(data []byte) (BaseMessage, interface{}, error) {
var base BaseMessage
if err := json.Unmarshal(data, &base); err != nil {
return base, nil, err
}
switch base.Type {
case "create":
var payload CreatePayload
if err := json.Unmarshal(base.Payload, &payload); err != nil {
return base, nil, err
}
return base, payload, nil
case "start":
var payload StartPayload
if err := json.Unmarshal(base.Payload, &payload); err != nil {
return base, nil, err
}
return base, payload, nil
case "stop":
var payload StopPayload
if err := json.Unmarshal(base.Payload, &payload); err != nil {
return base, nil, err
}
return base, payload, nil
default:
return base, base.Payload, nil
}
}

View File

@@ -0,0 +1,182 @@
package realtime
import (
"context"
"fmt"
"github.com/eclipse/paho.golang/autopaho"
"github.com/eclipse/paho.golang/paho"
"log/slog"
"nestri/maitred/internal/auth"
"nestri/maitred/internal/containers"
"nestri/maitred/internal/resource"
"net/url"
"os"
"time"
)
func Run(ctx context.Context, machineID string, containerEngine containers.ContainerEngine, resource *resource.Resource) error {
var clientID = generateClientID()
var topic = fmt.Sprintf("%s/%s/%s", resource.App.Name, resource.App.Stage, machineID)
var serverURL = fmt.Sprintf("wss://%s/mqtt?x-amz-customauthorizer-name=%s", resource.Realtime.Endpoint, resource.Realtime.Authorizer)
slog.Info("Realtime", "topic", topic)
userTokens, err := auth.FetchUserToken(machineID, resource)
if err != nil {
return err
}
slog.Info("Realtime", "token", userTokens.AccessToken)
u, err := url.Parse(serverURL)
if err != nil {
return err
}
router := paho.NewStandardRouter()
router.DefaultHandler(func(p *paho.Publish) {
slog.Debug("DefaultHandler", "topic", p.Topic, "message", fmt.Sprintf("default handler received message: %s - with topic: %s", p.Payload, p.Topic))
})
createTopic := fmt.Sprintf("%s/create", topic)
slog.Debug("Registering handler", "topic", createTopic)
router.RegisterHandler(createTopic, func(p *paho.Publish) {
slog.Debug("Router", "message", "received create message with payload", fmt.Sprintf("%s", p.Payload))
base, _, err := ParseMessage(p.Payload)
if err != nil {
slog.Error("Router", "err", fmt.Sprintf("failed to parse message: %s", err))
return
}
if base.Type != "create" {
slog.Error("Router", "err", "unexpected message type")
return
}
// Create runner container
containerID, err := CreateRunner(ctx, containerEngine)
if err != nil {
slog.Error("Router", "err", fmt.Sprintf("failed to create runner container: %s", err))
return
}
slog.Info("Router", "info", fmt.Sprintf("created runner container: %s", containerID))
})
startTopic := fmt.Sprintf("%s/start", topic)
slog.Debug("Registering handler", "topic", startTopic)
router.RegisterHandler(startTopic, func(p *paho.Publish) {
slog.Debug("Router", "message", "received start message with payload", fmt.Sprintf("%s", p.Payload))
base, payload, err := ParseMessage(p.Payload)
if err != nil {
slog.Error("Router", "err", fmt.Sprintf("failed to parse message: %s", err))
return
}
if base.Type != "start" {
slog.Error("Router", "err", "unexpected message type")
return
}
// Get container ID
startPayload, ok := payload.(StartPayload)
if !ok {
slog.Error("Router", "err", "failed to get payload")
return
}
// Start runner container
if err = containerEngine.StartContainer(ctx, startPayload.ContainerID); err != nil {
slog.Error("Router", "err", fmt.Sprintf("failed to start runner container: %s", err))
return
}
slog.Info("Router", "info", fmt.Sprintf("started runner container: %s", startPayload.ContainerID))
})
stopTopic := fmt.Sprintf("%s/stop", topic)
slog.Debug("Registering handler", "topic", stopTopic)
router.RegisterHandler(stopTopic, func(p *paho.Publish) {
slog.Debug("Router", "message", "received stop message with payload", fmt.Sprintf("%s", p.Payload))
base, payload, err := ParseMessage(p.Payload)
if err != nil {
slog.Error("Router", "err", fmt.Sprintf("failed to parse message: %s", err))
return
}
if base.Type != "stop" {
slog.Error("Router", "err", "unexpected message type")
return
}
// Get container ID
stopPayload, ok := payload.(StopPayload)
if !ok {
slog.Error("Router", "err", "failed to get payload")
return
}
// Stop runner container
if err = containerEngine.StopContainer(ctx, stopPayload.ContainerID); err != nil {
slog.Error("Router", "err", fmt.Sprintf("failed to stop runner container: %s", err))
return
}
slog.Info("Router", "info", fmt.Sprintf("stopped runner container: %s", stopPayload.ContainerID))
})
legacyLogger := slog.NewLogLogger(slog.NewTextHandler(os.Stdout, nil), slog.LevelError)
cliCfg := autopaho.ClientConfig{
ServerUrls: []*url.URL{u},
ConnectUsername: "",
ConnectPassword: []byte(userTokens.AccessToken),
KeepAlive: 20,
CleanStartOnInitialConnection: true,
SessionExpiryInterval: 60,
ReconnectBackoff: autopaho.NewConstantBackoff(time.Second),
OnConnectionUp: func(cm *autopaho.ConnectionManager, connAck *paho.Connack) {
slog.Info("Router", "info", "MQTT connection is up and running")
if _, err = cm.Subscribe(context.Background(), &paho.Subscribe{
Subscriptions: []paho.SubscribeOptions{
{Topic: fmt.Sprintf("%s/#", topic), QoS: 1},
},
}); err != nil {
slog.Error("Router", "err", fmt.Sprint("failed to subscribe, likely no messages will be received: ", err))
}
},
Errors: legacyLogger,
OnConnectError: func(err error) {
slog.Error("Router", "err", fmt.Sprintf("error whilst attempting connection: %s", err))
},
ClientConfig: paho.ClientConfig{
ClientID: clientID,
OnPublishReceived: []func(paho.PublishReceived) (bool, error){
func(pr paho.PublishReceived) (bool, error) {
router.Route(pr.Packet.Packet())
return true, nil
}},
OnClientError: func(err error) { slog.Error("Router", "err", fmt.Sprintf("client error: %s", err)) },
OnServerDisconnect: func(d *paho.Disconnect) {
if d.Properties != nil {
slog.Info("Router", "info", fmt.Sprintf("server requested disconnect: %s", d.Properties.ReasonString))
} else {
slog.Info("Router", "info", fmt.Sprintf("server requested disconnect; reason code: %d", d.ReasonCode))
}
},
},
}
c, err := autopaho.NewConnection(ctx, cliCfg)
if err != nil {
return err
}
if err = c.AwaitConnection(ctx); err != nil {
return err
}
return nil
}

View File

@@ -0,0 +1,17 @@
package realtime
import (
"crypto/rand"
"fmt"
"github.com/oklog/ulid/v2"
"time"
)
func generateClientID() string {
// Create a source of entropy (cryptographically secure)
entropy := ulid.Monotonic(rand.Reader, 0)
// Generate a new ULID
id := ulid.MustNew(ulid.Timestamp(time.Now()), entropy)
// Create the client ID string
return fmt.Sprintf("mch_%s", id.String())
}

View File

@@ -7,7 +7,7 @@ import (
"reflect"
)
type resource struct {
type Resource struct {
Api struct {
Url string `json:"url"`
}
@@ -17,7 +17,7 @@ type resource struct {
AuthFingerprintKey struct {
Value string `json:"value"`
}
Party struct {
Realtime struct {
Endpoint string `json:"endpoint"`
Authorizer string `json:"authorizer"`
}
@@ -27,20 +27,20 @@ type resource struct {
}
}
var Resource resource
func init() {
val := reflect.ValueOf(&Resource).Elem()
func NewResource() (*Resource, error) {
resource := Resource{}
val := reflect.ValueOf(&resource).Elem()
for i := 0; i < val.NumField(); i++ {
field := val.Field(i)
typeField := val.Type().Field(i)
envVarName := fmt.Sprintf("SST_RESOURCE_%s", typeField.Name)
envValue, exists := os.LookupEnv(envVarName)
if !exists {
panic(fmt.Sprintf("Environment variable %s is required", envVarName))
return nil, fmt.Errorf("missing environment variable %s", envVarName)
}
if err := json.Unmarshal([]byte(envValue), field.Addr().Interface()); err != nil {
panic(err)
return nil, fmt.Errorf("error unmarshalling %s: %w", envVarName, err)
}
}
return &resource, nil
}

View File

@@ -0,0 +1,184 @@
package system
import (
"bytes"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
)
const (
pciClassVGA = 0x0300 // VGA compatible controller
pciClass3D = 0x0302 // 3D controller
pciClassDisplay = 0x0380 // Display controller
pciClassCoProcessor = 0x0b40 // Co-processor (e.g., NVIDIA Tesla)
)
type infoPair struct {
Name string
ID int
}
type PCIInfo struct {
Slot string
Class infoPair
Vendor infoPair
Device infoPair
SVendor infoPair
SDevice infoPair
Rev string
ProgIf string
Driver string
Modules []string
IOMMUGroup string
}
const (
VendorIntel = 0x8086
VendorNVIDIA = 0x10de
VendorAMD = 0x1002
)
func GetAllGPUInfo() ([]PCIInfo, error) {
var gpus []PCIInfo
cmd := exec.Command("lspci", "-mmvvvnnkD")
output, err := cmd.Output()
if err != nil {
return nil, err
}
sections := bytes.Split(output, []byte("\n\n"))
for _, section := range sections {
var info PCIInfo
lines := bytes.Split(section, []byte("\n"))
for _, line := range lines {
parts := bytes.SplitN(line, []byte(":"), 2)
if len(parts) < 2 {
continue
}
key := strings.TrimSpace(string(parts[0]))
value := strings.TrimSpace(string(parts[1]))
switch key {
case "Slot":
info.Slot = value
case "Class":
info.Class, err = parseInfoPair(value)
case "Vendor":
info.Vendor, err = parseInfoPair(value)
case "Device":
info.Device, err = parseInfoPair(value)
case "SVendor":
info.SVendor, err = parseInfoPair(value)
case "SDevice":
info.SDevice, err = parseInfoPair(value)
case "Rev":
info.Rev = value
case "ProgIf":
info.ProgIf = value
case "Driver":
info.Driver = value
case "Module":
info.Modules = append(info.Modules, value)
case "IOMMUGroup":
info.IOMMUGroup = value
}
if err != nil {
return nil, err
}
}
// Check if this is a GPU device
if isGPUClass(info.Class.ID) {
gpus = append(gpus, info)
}
}
return gpus, nil
}
// gets infoPair from "SomeName [SomeID]"
// example: "DG2 [Arc A770] [56a0]" -> Name: "DG2 [Arc A770]", ID: "56a0"
func parseInfoPair(pair string) (infoPair, error) {
parts := strings.Split(pair, "[")
if len(parts) < 2 {
return infoPair{}, errors.New("invalid info pair")
}
id := strings.TrimSuffix(parts[len(parts)-1], "]")
name := strings.TrimSuffix(pair, "["+id)
name = strings.TrimSpace(name)
id = strings.TrimSpace(id)
// Remove ID including square brackets from name
name = strings.ReplaceAll(name, "["+id+"]", "")
name = strings.TrimSpace(name)
idHex, err := parseHexID(id)
if err != nil {
return infoPair{}, err
}
return infoPair{
Name: name,
ID: idHex,
}, nil
}
func parseHexID(id string) (int, error) {
if strings.HasPrefix(id, "0x") {
id = id[2:]
}
parsed, err := strconv.ParseInt(id, 16, 32)
if err != nil {
return 0, err
}
return int(parsed), nil
}
func isGPUClass(class int) bool {
return class == pciClassVGA || class == pciClass3D || class == pciClassDisplay || class == pciClassCoProcessor
}
// GetCardDevices returns the /dev/dri/cardX and /dev/dri/renderDXXX device
func (info PCIInfo) GetCardDevices() (cardPath, renderPath string, err error) {
busID := strings.ToLower(info.Slot)
if !strings.HasPrefix(busID, "0000:") || len(busID) != 12 || busID[4] != ':' || busID[7] != ':' || busID[10] != '.' {
return "", "", fmt.Errorf("invalid PCI Bus ID format: %s (expected 0000:XX:YY.Z)", busID)
}
byPathDir := "/dev/dri/by-path/"
entries, err := os.ReadDir(byPathDir)
if err != nil {
return "", "", fmt.Errorf("failed to read %s: %v", byPathDir, err)
}
for _, entry := range entries {
name := entry.Name()
if strings.HasPrefix(name, "pci-"+busID+"-card") {
cardPath, err = filepath.EvalSymlinks(filepath.Join(byPathDir, name))
if err != nil {
return "", "", fmt.Errorf("failed to resolve card symlink %s: %v", name, err)
}
}
if strings.HasPrefix(name, "pci-"+busID+"-render") {
renderPath, err = filepath.EvalSymlinks(filepath.Join(byPathDir, name))
if err != nil {
return "", "", fmt.Errorf("failed to resolve render symlink %s: %v", name, err)
}
}
}
if cardPath == "" && renderPath == "" {
return "", "", fmt.Errorf("no DRM devices found for PCI Bus ID: %s", busID)
}
return cardPath, renderPath, nil
}

View File

@@ -0,0 +1,290 @@
package system
import (
"bufio"
"fmt"
"log/slog"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"unsafe"
)
// FDInfo holds parsed fdinfo data
type FDInfo struct {
ClientID string
EngineTime uint64 // i915: "drm-engine-render" in ns
Cycles uint64 // Xe: "drm-cycles-rcs"
TotalCycles uint64 // Xe: "drm-total-cycles-rcs"
MemoryVRAM uint64 // i915: "drm-memory-vram", Xe: "drm-total-vram0" in bytes
}
// findCardX maps PCI slot to /dev/dri/cardX
func findCardX(pciSlot string) (string, error) {
driPath := "/sys/class/drm"
entries, err := os.ReadDir(driPath)
if err != nil {
return "", fmt.Errorf("failed to read /sys/class/drm: %v", err)
}
for _, entry := range entries {
if strings.HasPrefix(entry.Name(), "card") {
deviceLink := filepath.Join(driPath, entry.Name(), "device")
target, err := os.Readlink(deviceLink)
if err != nil {
continue
}
if strings.Contains(target, pciSlot) {
return entry.Name(), nil
}
}
}
return "", fmt.Errorf("no cardX found for PCI slot %s", pciSlot)
}
// getDriver retrieves the driver name
func getDriver(cardX string) (string, error) {
driverLink := filepath.Join("/sys/class/drm", cardX, "device", "driver")
target, err := os.Readlink(driverLink)
if err != nil {
return "", fmt.Errorf("failed to read driver link for %s: %v", cardX, err)
}
return filepath.Base(target), nil
}
// collectFDInfo gathers fdinfo data
func collectFDInfo(cardX string) ([]FDInfo, error) {
var fdInfos []FDInfo
clientIDs := make(map[string]struct{})
procDirs, err := os.ReadDir("/proc")
if err != nil {
return nil, fmt.Errorf("failed to read /proc: %v", err)
}
for _, procDir := range procDirs {
if !procDir.IsDir() {
continue
}
pid := procDir.Name()
if _, err := strconv.Atoi(pid); err != nil {
continue
}
fdDir := filepath.Join("/proc", pid, "fd")
fdEntries, err := os.ReadDir(fdDir)
if err != nil {
continue
}
for _, fdEntry := range fdEntries {
fdPath := filepath.Join(fdDir, fdEntry.Name())
target, err := os.Readlink(fdPath)
if err != nil {
continue
}
if target == "/dev/dri/"+cardX {
fdinfoPath := filepath.Join("/proc", pid, "fdinfo", fdEntry.Name())
file, err := os.Open(fdinfoPath)
if err != nil {
continue
}
scanner := bufio.NewScanner(file)
var clientID, engineTime, cycles, totalCycles, memoryVRAM string
for scanner.Scan() {
line := scanner.Text()
parts := strings.SplitN(line, ":", 2)
if len(parts) < 2 {
continue
}
key := strings.TrimSpace(parts[0])
value := strings.TrimSpace(parts[1])
switch key {
case "drm-client-id":
clientID = value
case "drm-engine-render":
engineTime = value
case "drm-cycles-rcs":
cycles = value
case "drm-total-cycles-rcs":
totalCycles = value
case "drm-memory-vram", "drm-total-vram0": // i915 and Xe keys
memoryVRAM = value
}
}
if clientID == "" || clientID == "0" {
continue
}
if _, exists := clientIDs[clientID]; exists {
continue
}
clientIDs[clientID] = struct{}{}
fdInfo := FDInfo{ClientID: clientID}
if engineTime != "" {
fdInfo.EngineTime, _ = strconv.ParseUint(engineTime, 10, 64)
}
if cycles != "" {
fdInfo.Cycles, _ = strconv.ParseUint(cycles, 10, 64)
}
if totalCycles != "" {
fdInfo.TotalCycles, _ = strconv.ParseUint(totalCycles, 10, 64)
}
if memoryVRAM != "" {
if strings.HasSuffix(memoryVRAM, " kB") || strings.HasSuffix(memoryVRAM, " KiB") {
memKB := strings.TrimSuffix(strings.TrimSuffix(memoryVRAM, " kB"), " KiB")
if mem, err := strconv.ParseUint(memKB, 10, 64); err == nil {
fdInfo.MemoryVRAM = mem * 1024 // Convert kB to bytes
}
} else {
fdInfo.MemoryVRAM, _ = strconv.ParseUint(memoryVRAM, 10, 64) // Assume bytes if no unit
}
}
fdInfos = append(fdInfos, fdInfo)
_ = file.Close()
}
}
}
return fdInfos, nil
}
// drmIoctl wraps the syscall.Syscall for ioctl
func drmIoctl(fd int, request uintptr, data unsafe.Pointer) error {
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), request, uintptr(data))
if errno != 0 {
return fmt.Errorf("ioctl failed: %v", errno)
}
return nil
}
func monitorIntelGPU(device PCIInfo) GPUUsage {
// Map PCI slot to cardX
cardX, err := findCardX(device.Slot)
if err != nil {
slog.Warn("failed to find cardX for Intel GPU", "slot", device.Slot, "error", err)
return GPUUsage{}
}
// Determine driver
driver, err := getDriver(cardX)
if err != nil {
slog.Warn("failed to get driver", "card", cardX, "error", err)
return GPUUsage{}
}
if driver != "i915" && driver != "xe" {
slog.Warn("unsupported Intel driver", "driver", driver, "card", cardX)
return GPUUsage{}
}
// PCIInfo also has the driver, let's warn if they don't match
if device.Driver != driver {
slog.Warn("driver mismatch", "card", cardX, "lspci driver", device.Driver, "sysfs driver", driver)
}
// Open DRM device
cardPath := "/dev/dri/" + cardX
fd, err := syscall.Open(cardPath, syscall.O_RDWR, 0)
if err != nil {
slog.Error("failed to open DRM device", "path", cardPath, "error", err)
return GPUUsage{}
}
defer func(fd int) {
_ = syscall.Close(fd)
}(fd)
// Get total and used VRAM via ioctl
var totalVRAM, usedVRAMFromIOCTL uint64
if driver == "i915" {
totalVRAM, usedVRAMFromIOCTL, err = getMemoryRegionsI915(fd)
} else { // xe
totalVRAM, usedVRAMFromIOCTL, err = queryMemoryRegionsXE(fd)
}
if err != nil {
//slog.Debug("failed to get memory regions", "card", cardX, "error", err)
// Proceed with totalVRAM = 0 if ioctl fails
}
// Collect samples for usage percentage
firstFDInfos, err := collectFDInfo(cardX)
if err != nil {
slog.Warn("failed to collect first FDInfo", "card", cardX, "error", err)
return GPUUsage{}
}
time.Sleep(1 * time.Second)
secondFDInfos, err := collectFDInfo(cardX)
if err != nil {
slog.Warn("failed to collect second FDInfo", "card", cardX, "error", err)
return GPUUsage{}
}
// Calculate usage percentage
var usagePercent float64
if driver == "i915" {
var totalDeltaTime uint64
for _, second := range secondFDInfos {
for _, first := range firstFDInfos {
if second.ClientID == first.ClientID {
totalDeltaTime += second.EngineTime - first.EngineTime
break
}
}
}
if totalDeltaTime > 0 {
usagePercent = float64(totalDeltaTime) / 1e9 * 100 // ns to percent
}
} else { // xe
var totalDeltaCycles, deltaTotalCycles uint64
for i, second := range secondFDInfos {
for _, first := range firstFDInfos {
if second.ClientID == first.ClientID {
deltaCycles := second.Cycles - first.Cycles
totalDeltaCycles += deltaCycles
if i == 0 {
deltaTotalCycles = second.TotalCycles - first.TotalCycles
}
break
}
}
}
if deltaTotalCycles > 0 {
usagePercent = float64(totalDeltaCycles) / float64(deltaTotalCycles) * 100
}
}
if usagePercent > 100 {
usagePercent = 100
}
// Sum per-process VRAM usage as fallback
var usedVRAM uint64
for _, fdInfo := range secondFDInfos {
usedVRAM += fdInfo.MemoryVRAM
}
// Prefer ioctl used VRAM if available and non-zero
if usedVRAMFromIOCTL != 0 {
usedVRAM = usedVRAMFromIOCTL
}
// Compute VRAM metrics
var freeVRAM uint64
var usedPercent float64
if totalVRAM > 0 {
if usedVRAM > totalVRAM {
usedVRAM = totalVRAM
}
freeVRAM = totalVRAM - usedVRAM
usedPercent = float64(usedVRAM) / float64(totalVRAM) * 100
}
return GPUUsage{
Info: device,
UsagePercent: usagePercent,
VRAM: VRAMUsage{
Total: totalVRAM,
Used: usedVRAM,
Free: freeVRAM,
UsedPercent: usedPercent,
},
}
}

View File

@@ -0,0 +1,86 @@
package system
import (
"fmt"
"unsafe"
)
// Constants for i915
const (
DRM_COMMAND_BASE = 0x40
DRM_I915_QUERY = 0x39
DRM_IOCTL_I915_QUERY = 0x80106479 // _IOWR('d', 0x79, 16)
DRM_I915_QUERY_MEMORY_REGIONS = 4
I915_MEMORY_CLASS_DEVICE = 1
)
// drmI915QueryItem mirrors struct drm_i915_query_item
type drmI915QueryItem struct {
QueryID uintptr
Length int32
Flags uint32
DataPtr uintptr
}
// drmI915Query mirrors struct drm_i915_query
type drmI915Query struct {
NumItems uint32
Flags uint32
ItemsPtr uintptr
}
// drmI915MemoryRegionInfo mirrors struct drm_i915_memory_region_info
type drmI915MemoryRegionInfo struct {
Region struct {
MemoryClass uint16
MemoryInstance uint16
}
Rsvd0 uint32
ProbedSize uint64
UnallocatedSize uint64
Rsvd1 [8]uint64
}
func getMemoryRegionsI915(fd int) (totalVRAM, usedVRAM uint64, err error) {
// Step 1: Get the required buffer size
item := drmI915QueryItem{
QueryID: DRM_I915_QUERY_MEMORY_REGIONS,
Length: 0,
}
query := drmI915Query{
NumItems: 1,
ItemsPtr: uintptr(unsafe.Pointer(&item)),
}
if err = drmIoctl(fd, DRM_IOCTL_I915_QUERY, unsafe.Pointer(&query)); err != nil {
return 0, 0, fmt.Errorf("initial i915 query failed: %v", err)
}
if item.Length <= 0 {
return 0, 0, fmt.Errorf("i915 query returned invalid length: %d", item.Length)
}
// Step 2: Allocate buffer and perform the query
data := make([]byte, item.Length)
item.DataPtr = uintptr(unsafe.Pointer(&data[0]))
if err = drmIoctl(fd, DRM_IOCTL_I915_QUERY, unsafe.Pointer(&query)); err != nil {
return 0, 0, fmt.Errorf("second i915 query failed: %v", err)
}
// Step 3: Parse the memory regions
numRegions := *(*uint32)(unsafe.Pointer(&data[0]))
headerSize := uint32(16) // num_regions (4) + rsvd[3] (12) = 16 bytes
regionSize := uint32(88) // Size of drm_i915_memory_region_info (calculated: 4+4+8+8+64)
for i := uint32(0); i < numRegions; i++ {
offset := headerSize + i*regionSize
if offset+regionSize > uint32(len(data)) {
return 0, 0, fmt.Errorf("data buffer too small for i915 region %d", i)
}
mr := (*drmI915MemoryRegionInfo)(unsafe.Pointer(&data[offset]))
if mr.Region.MemoryClass == I915_MEMORY_CLASS_DEVICE {
totalVRAM += mr.ProbedSize
usedVRAM += mr.ProbedSize - mr.UnallocatedSize
}
}
return totalVRAM, usedVRAM, nil
}

View File

@@ -0,0 +1,84 @@
package system
import (
"fmt"
"unsafe"
)
// Constants from xe_drm.h
const (
DRM_XE_DEVICE_QUERY_MEM_REGIONS = 1
DRM_XE_MEM_REGION_CLASS_VRAM = 1
DRM_XE_DEVICE_QUERY = 0x00
DRM_IOCTL_XE_DEVICE_QUERY uintptr = 0xC0286440 // Precomputed as above
)
// drmXEDeviceQuery mirrors struct drm_xe_device_query
type drmXEDeviceQuery struct {
Extensions uint64
Query uint32
Size uint32
Data uint64
Reserved [2]uint64
}
// drmXEQueryMemRegions mirrors struct drm_xe_query_mem_regions header
type drmXEQueryMemRegions struct {
NumMemRegions uint32
Pad uint32
// mem_regions[] follows
}
// drmXEMemRegion mirrors struct drm_xe_mem_region
type drmXEMemRegion struct {
MemClass uint16
Instance uint16
MinPageSize uint32
TotalSize uint64
Used uint64
CPUVisibleSize uint64
CPUVisibleUsed uint64
Reserved [6]uint64
}
func queryMemoryRegionsXE(fd int) (totalVRAM, usedVRAM uint64, err error) {
// Step 1: Get the required size
query := drmXEDeviceQuery{
Query: DRM_XE_DEVICE_QUERY_MEM_REGIONS,
Size: 0,
}
if err = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, unsafe.Pointer(&query)); err != nil {
return 0, 0, fmt.Errorf("initial xe query failed: %v", err)
}
if query.Size == 0 {
return 0, 0, fmt.Errorf("xe query returned zero size")
}
// Step 2: Allocate buffer and perform the query
data := make([]byte, query.Size)
query.Data = uint64(uintptr(unsafe.Pointer(&data[0])))
query.Size = uint32(len(data))
if err = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, unsafe.Pointer(&query)); err != nil {
return 0, 0, fmt.Errorf("second xe query failed: %v", err)
}
// Step 3: Parse the memory regions
header := (*drmXEQueryMemRegions)(unsafe.Pointer(&data[0]))
numRegions := header.NumMemRegions
headerSize := unsafe.Sizeof(drmXEQueryMemRegions{})
regionSize := unsafe.Sizeof(drmXEMemRegion{})
for i := uint32(0); i < numRegions; i++ {
offset := headerSize + uintptr(i)*regionSize
if offset+regionSize > uintptr(len(data)) {
return 0, 0, fmt.Errorf("data buffer too small for xe region %d", i)
}
mr := (*drmXEMemRegion)(unsafe.Pointer(&data[offset]))
if mr.MemClass == DRM_XE_MEM_REGION_CLASS_VRAM {
totalVRAM += mr.TotalSize
usedVRAM += mr.Used
}
}
return totalVRAM, usedVRAM, nil
}

View File

@@ -0,0 +1,57 @@
package system
import (
"log/slog"
"os/exec"
"strconv"
"strings"
)
// monitorNVIDIAGPU monitors an NVIDIA GPU using nvidia-smi
func monitorNVIDIAGPU(device PCIInfo) GPUUsage {
// Query nvidia-smi for GPU metrics
cmd := exec.Command("nvidia-smi", "--query-gpu=pci.bus_id,utilization.gpu,memory.total,memory.used,memory.free", "--format=csv,noheader,nounits")
output, err := cmd.Output()
if err != nil {
slog.Warn("failed to run nvidia-smi", "error", err)
return GPUUsage{}
}
// Parse output and find matching GPU
lines := strings.Split(strings.TrimSpace(string(output)), "\n")
for _, line := range lines {
fields := strings.Split(line, ", ")
if len(fields) != 5 {
continue
}
busID := fields[0] // e.g., "0000:01:00.0"
if strings.Contains(busID, device.Slot) || strings.Contains(device.Slot, busID) {
usagePercent, _ := strconv.ParseFloat(fields[1], 64)
totalMiB, _ := strconv.ParseUint(fields[2], 10, 64)
usedMiB, _ := strconv.ParseUint(fields[3], 10, 64)
freeMiB, _ := strconv.ParseUint(fields[4], 10, 64)
// Convert MiB to bytes
total := totalMiB * 1024 * 1024
used := usedMiB * 1024 * 1024
free := freeMiB * 1024 * 1024
usedPercent := float64(0)
if total > 0 {
usedPercent = float64(used) / float64(total) * 100
}
return GPUUsage{
Info: device,
UsagePercent: usagePercent,
VRAM: VRAMUsage{
Total: total,
Used: used,
Free: free,
UsedPercent: usedPercent,
},
}
}
}
slog.Warn("No NVIDIA GPU found matching PCI slot", "slot", device.Slot)
return GPUUsage{}
}

View File

@@ -0,0 +1,24 @@
package system
import (
"os"
"strings"
)
const (
dbusPath = "/var/lib/dbus/machine-id"
dbusPathEtc = "/etc/machine-id"
)
// GetID returns the machine ID specified at `/var/lib/dbus/machine-id` or `/etc/machine-id`.
// If there is an error reading the files an empty string is returned.
func GetID() (string, error) {
id, err := os.ReadFile(dbusPath)
if err != nil {
id, err = os.ReadFile(dbusPathEtc)
}
if err != nil {
return "", err
}
return strings.Trim(string(id), " \n"), nil
}

View File

@@ -0,0 +1,405 @@
package system
import (
"bufio"
"bytes"
"context"
"fmt"
"log/slog"
"os"
"os/exec"
"strconv"
"strings"
"sync"
"time"
)
// CPUInfo contains CPU model information
type CPUInfo struct {
Vendor string `json:"vendor"` // CPU vendor (e.g., "AMD", "Intel")
Model string `json:"model"` // CPU model name
}
// CPUUsage contains CPU usage metrics
type CPUUsage struct {
Info CPUInfo `json:"info"` // CPU vendor and model information
Total float64 `json:"total"` // Total CPU usage in percentage (0-100)
PerCore []float64 `json:"per_core"` // CPU usage per core in percentage (0-100)
}
// MemoryUsage contains memory usage metrics
type MemoryUsage struct {
Total uint64 `json:"total"` // Total memory in bytes
Used uint64 `json:"used"` // Used memory in bytes
Available uint64 `json:"available"` // Available memory in bytes
Free uint64 `json:"free"` // Free memory in bytes
UsedPercent float64 `json:"used_percent"` // Used memory in percentage (0-100)
}
// FilesystemUsage contains usage metrics for a filesystem path
type FilesystemUsage struct {
Path string `json:"path"` // Filesystem path
Total uint64 `json:"total"` // Total disk space in bytes
Used uint64 `json:"used"` // Used disk space in bytes
Free uint64 `json:"free"` // Free disk space in bytes
UsedPercent float64 `json:"used_percent"` // Used disk space in percentage (0-100)
}
// GPUUsage contains GPU usage metrics
type GPUUsage struct {
Info PCIInfo `json:"pci_info"` // GPU PCI information
UsagePercent float64 `json:"usage_percent"` // GPU usage in percentage (0-100)
VRAM VRAMUsage `json:"vram"` // GPU memory usage metrics
}
// VRAMUsage contains GPU memory usage metrics
type VRAMUsage struct {
Total uint64 `json:"total"` // Total VRAM in bytes
Used uint64 `json:"used"` // Used VRAM in bytes
Free uint64 `json:"free"` // Free VRAM in bytes
UsedPercent float64 `json:"used_percent"` // Used VRAM in percentage (0-100)
}
// ResourceUsage contains resource usage metrics
type ResourceUsage struct {
CPU CPUUsage `json:"cpu"` // CPU usage metrics
Memory MemoryUsage `json:"memory"` // Memory usage metrics
Disk FilesystemUsage `json:"disk"` // Disk usage metrics
GPUs []GPUUsage `json:"gpus"` // Per-GPU usage metrics
}
var (
lastUsage ResourceUsage
lastUsageMutex sync.RWMutex
)
// GetSystemUsage returns last known system resource usage metrics
func GetSystemUsage() ResourceUsage {
lastUsageMutex.RLock()
defer lastUsageMutex.RUnlock()
return lastUsage
}
// StartMonitoring begins periodic system usage monitoring with the given interval
func StartMonitoring(ctx context.Context, interval time.Duration) {
slog.Info("Starting system monitoring")
go func() {
// Initial sample immediately
updateUsage()
// Ticker for periodic updates
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
slog.Info("Stopping system monitoring")
return
case <-ticker.C:
updateUsage()
}
}
}()
}
// updateUsage collects and updates the lastUsage variable
func updateUsage() {
// Collect CPU usage
cpu := GetCPUUsage()
// Collect memory usage
memory := GetMemoryUsage()
// Collect root filesystem usage
rootfs, err := GetFilesystemUsage("/")
if err != nil {
slog.Warn("Failed to get root filesystem usage", "error", err)
}
// Collect GPU usage
gpus := GetGPUUsage()
// Update shared variable safely
lastUsageMutex.Lock()
lastUsage = ResourceUsage{
CPU: cpu,
Memory: memory,
Disk: rootfs,
GPUs: gpus,
}
lastUsageMutex.Unlock()
}
// PrettyString returns resource usage metrics in a human-readable format string
func (r ResourceUsage) PrettyString() string {
res := "Resource Usage:\n"
res += fmt.Sprintf(" CPU:\n")
res += fmt.Sprintf(" Vendor: %s\n", r.CPU.Info.Vendor)
res += fmt.Sprintf(" Model: %s\n", r.CPU.Info.Model)
res += fmt.Sprintf(" Total Usage: %.2f%%\n", r.CPU.Total)
res += fmt.Sprintf(" Per-Core Usage:\n")
res += fmt.Sprintf(" [")
for i, coreUsage := range r.CPU.PerCore {
res += fmt.Sprintf("%.2f%%", coreUsage)
if i < len(r.CPU.PerCore)-1 {
res += ", "
}
}
res += "]\n"
res += fmt.Sprintf(" Memory:\n")
res += fmt.Sprintf(" Total: %d bytes\n", r.Memory.Total)
res += fmt.Sprintf(" Used: %d bytes\n", r.Memory.Used)
res += fmt.Sprintf(" Available: %d bytes\n", r.Memory.Available)
res += fmt.Sprintf(" Free: %d bytes\n", r.Memory.Free)
res += fmt.Sprintf(" Used Percent: %.2f%%\n", r.Memory.UsedPercent)
res += fmt.Sprintf(" Filesystem:\n")
res += fmt.Sprintf(" Path: %s\n", r.Disk.Path)
res += fmt.Sprintf(" Total: %d bytes\n", r.Disk.Total)
res += fmt.Sprintf(" Used: %d bytes\n", r.Disk.Used)
res += fmt.Sprintf(" Free: %d bytes\n", r.Disk.Free)
res += fmt.Sprintf(" Used Percent: %.2f%%\n", r.Disk.UsedPercent)
res += fmt.Sprintf(" GPUs:\n")
for i, gpu := range r.GPUs {
cardDev, renderDev, err := gpu.Info.GetCardDevices()
if err != nil {
slog.Warn("Failed to get card and render devices", "error", err)
}
res += fmt.Sprintf(" GPU %d:\n", i)
res += fmt.Sprintf(" Vendor: %s\n", gpu.Info.Vendor.Name)
res += fmt.Sprintf(" Model: %s\n", gpu.Info.Device.Name)
res += fmt.Sprintf(" Driver: %s\n", gpu.Info.Driver)
res += fmt.Sprintf(" Card Device: %s\n", cardDev)
res += fmt.Sprintf(" Render Device: %s\n", renderDev)
res += fmt.Sprintf(" Usage Percent: %.2f%%\n", gpu.UsagePercent)
res += fmt.Sprintf(" VRAM:\n")
res += fmt.Sprintf(" Total: %d bytes\n", gpu.VRAM.Total)
res += fmt.Sprintf(" Used: %d bytes\n", gpu.VRAM.Used)
res += fmt.Sprintf(" Free: %d bytes\n", gpu.VRAM.Free)
res += fmt.Sprintf(" Used Percent: %.2f%%\n", gpu.VRAM.UsedPercent)
}
return res
}
// GetCPUUsage gathers CPU usage
func GetCPUUsage() CPUUsage {
// Helper to read /proc/stat
readStat := func() (uint64, uint64, []uint64, []uint64) {
statBytes, err := os.ReadFile("/proc/stat")
if err != nil {
slog.Warn("Failed to read /proc/stat", "error", err)
return 0, 0, nil, nil
}
statScanner := bufio.NewScanner(bytes.NewReader(statBytes))
statScanner.Scan() // Total CPU line
fields := strings.Fields(statScanner.Text())[1:]
var total, idle uint64
for i, field := range fields {
val, _ := strconv.ParseUint(field, 10, 64)
total += val
if i == 3 { // Idle time
idle = val
}
}
var perCoreTotals, perCoreIdles []uint64
for statScanner.Scan() {
line := statScanner.Text()
if !strings.HasPrefix(line, "cpu") {
break
}
coreFields := strings.Fields(line)[1:]
var coreTotal, coreIdle uint64
for i, field := range coreFields {
val, _ := strconv.ParseUint(field, 10, 64)
coreTotal += val
if i == 3 { // Idle time
coreIdle = val
}
}
perCoreTotals = append(perCoreTotals, coreTotal)
perCoreIdles = append(perCoreIdles, coreIdle)
}
return total, idle, perCoreTotals, perCoreIdles
}
// First sample
prevTotal, prevIdle, prevPerCoreTotals, prevPerCoreIdles := readStat()
time.Sleep(1 * time.Second) // Delay for accurate delta
// Second sample
currTotal, currIdle, currPerCoreTotals, currPerCoreIdles := readStat()
// Calculate total CPU usage
totalDiff := float64(currTotal - prevTotal)
idleDiff := float64(currIdle - prevIdle)
var totalUsage float64
if totalDiff > 0 {
totalUsage = ((totalDiff - idleDiff) / totalDiff) * 100
}
// Calculate per-core usage
var perCore []float64
for i := range currPerCoreTotals {
coreTotalDiff := float64(currPerCoreTotals[i] - prevPerCoreTotals[i])
coreIdleDiff := float64(currPerCoreIdles[i] - prevPerCoreIdles[i])
if coreTotalDiff > 0 {
perCoreUsage := ((coreTotalDiff - coreIdleDiff) / coreTotalDiff) * 100
perCore = append(perCore, perCoreUsage)
} else {
perCore = append(perCore, 0)
}
}
// Get CPU info
cpuInfoBytes, err := os.ReadFile("/proc/cpuinfo")
if err != nil {
slog.Warn("Failed to read /proc/cpuinfo", "error", err)
return CPUUsage{}
}
cpuInfo := string(cpuInfoBytes)
scanner := bufio.NewScanner(strings.NewReader(cpuInfo))
var vendor, model string
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "vendor_id") {
vendor = strings.TrimSpace(strings.Split(line, ":")[1])
} else if strings.HasPrefix(line, "model name") {
model = strings.TrimSpace(strings.Split(line, ":")[1])
}
if vendor != "" && model != "" {
break
}
}
return CPUUsage{
Info: CPUInfo{
Vendor: vendor,
Model: model,
},
Total: totalUsage,
PerCore: perCore,
}
}
// GetMemoryUsage gathers memory usage from /proc/meminfo
func GetMemoryUsage() MemoryUsage {
data, err := os.ReadFile("/proc/meminfo")
if err != nil {
panic(err)
}
scanner := bufio.NewScanner(bytes.NewReader(data))
var total, free, available uint64
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "MemTotal:") {
total = parseMemInfoLine(line)
} else if strings.HasPrefix(line, "MemFree:") {
free = parseMemInfoLine(line)
} else if strings.HasPrefix(line, "MemAvailable:") {
available = parseMemInfoLine(line)
}
}
used := total - available
usedPercent := (float64(used) / float64(total)) * 100
return MemoryUsage{
Total: total * 1024, // Convert from KB to bytes
Used: used * 1024,
Available: available * 1024,
Free: free * 1024,
UsedPercent: usedPercent,
}
}
// parseMemInfoLine parses a line from /proc/meminfo
func parseMemInfoLine(line string) uint64 {
fields := strings.Fields(line)
val, _ := strconv.ParseUint(fields[1], 10, 64)
return val
}
// GetFilesystemUsage gathers usage statistics for the specified path
func GetFilesystemUsage(path string) (FilesystemUsage, error) {
cmd := exec.Command("df", path)
output, err := cmd.Output()
if err != nil {
return FilesystemUsage{}, err
}
lines := strings.Split(string(output), "\n")
if len(lines) < 2 {
return FilesystemUsage{}, fmt.Errorf("unexpected `df` output format for path: %s", path)
}
fields := strings.Fields(lines[1])
if len(fields) < 5 {
return FilesystemUsage{}, fmt.Errorf("insufficient fields in `df` output for path: %s", path)
}
total, err := strconv.ParseUint(fields[1], 10, 64)
if err != nil {
return FilesystemUsage{}, fmt.Errorf("failed to parse total space: %v", err)
}
used, err := strconv.ParseUint(fields[2], 10, 64)
if err != nil {
return FilesystemUsage{}, fmt.Errorf("failed to parse used space: %v", err)
}
free, err := strconv.ParseUint(fields[3], 10, 64)
if err != nil {
return FilesystemUsage{}, fmt.Errorf("failed to parse free space: %v", err)
}
usedPercent, err := strconv.ParseFloat(strings.TrimSuffix(fields[4], "%"), 64)
if err != nil {
return FilesystemUsage{}, fmt.Errorf("failed to parse used percentage: %v", err)
}
return FilesystemUsage{
Path: path,
Total: total * 1024,
Used: used * 1024,
Free: free * 1024,
UsedPercent: usedPercent,
}, nil
}
// GetGPUUsage gathers GPU usage for all detected GPUs
func GetGPUUsage() []GPUUsage {
var gpus []GPUUsage
// Detect all GPUs
pciInfos, err := GetAllGPUInfo()
if err != nil {
slog.Warn("Failed to get GPU info", "error", err)
return nil
}
// Monitor each GPU
for _, gpu := range pciInfos {
var gpuUsage GPUUsage
switch gpu.Vendor.ID {
case VendorIntel:
gpuUsage = monitorIntelGPU(gpu)
case VendorNVIDIA:
gpuUsage = monitorNVIDIAGPU(gpu)
case VendorAMD:
// TODO: Implement if needed
continue
default:
continue
}
gpus = append(gpus, gpuUsage)
}
return gpus
}

View File

@@ -1,21 +1,126 @@
package main
import (
"nestri/maitred/pkg/party"
"context"
"log/slog"
"nestri/maitred/internal"
"nestri/maitred/internal/containers"
"nestri/maitred/internal/realtime"
"nestri/maitred/internal/resource"
"nestri/maitred/internal/system"
"os"
"github.com/charmbracelet/log"
"os/signal"
"syscall"
"time"
)
func main() {
var teamSlug string //FIXME: Switch to team-slug as they are more memorable but still unique
// Setup main context and stopper
mainCtx, mainStop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
if len(os.Args) > 1 {
teamSlug = os.Args[1]
} else {
log.Fatal("Nestri needs a team slug to register this container to")
// Get flags and log them
internal.InitFlags()
internal.GetFlags().DebugLog()
logLevel := slog.LevelInfo
if internal.GetFlags().Verbose {
logLevel = slog.LevelDebug
}
party.Run(teamSlug)
//TODO: On stop here, set the API as the instance is not running (stopped)
// Create the base handler with debug level
baseHandler := slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
Level: logLevel,
})
customHandler := &internal.CustomHandler{Handler: baseHandler}
logger := slog.New(customHandler)
slog.SetDefault(logger)
if !internal.GetFlags().NoMonitor {
// Start system monitoring, fetch every 5 seconds
system.StartMonitoring(mainCtx, 5*time.Second)
}
// Get machine ID
machineID, err := system.GetID()
if err != nil {
slog.Error("failed getting machine id", "err", machineID)
}
slog.Info("Machine ID", "id", machineID)
// Initialize container engine
ctrEngine, err := containers.NewContainerEngine()
if err != nil {
slog.Error("failed initializing container engine", "err", err)
mainStop()
return
}
defer func(ctrEngine containers.ContainerEngine) {
// Stop our managed containers first, with a 30 second timeout
cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cleanupCancel()
err = realtime.CleanupManaged(cleanupCtx, ctrEngine)
if err != nil {
slog.Error("failed cleaning up managed containers", "err", err)
}
err = ctrEngine.Close()
if err != nil {
slog.Error("failed closing container engine", "err", err)
}
}(ctrEngine)
// Print engine info
info, err := ctrEngine.Info(mainCtx)
if err != nil {
slog.Error("failed getting engine info", "err", err)
mainStop()
return
}
slog.Info("Container engine", "info", info)
if err = realtime.InitializeManager(mainCtx, ctrEngine); err != nil {
slog.Error("failed initializing container manager", "err", err)
mainStop()
return
}
// If in debug mode, skip running SST - MQTT connections
if !internal.GetFlags().Debug {
// Initialize SST resource
res, err := resource.NewResource()
if err != nil {
slog.Error("failed getting resource", "err", err)
mainStop()
return
}
// Run realtime
err = realtime.Run(mainCtx, machineID, ctrEngine, res)
if err != nil {
slog.Error("failed running realtime", "err", err)
mainStop()
return
}
}
// Create relay container
slog.Info("Creating default relay container")
relayID, err := realtime.CreateRelay(mainCtx, ctrEngine)
if err != nil {
slog.Error("failed creating relay container", "err", err)
mainStop()
return
}
// Start relay container
slog.Info("Starting default relay container", "id", relayID)
if err = realtime.StartRelay(mainCtx, ctrEngine, relayID); err != nil {
slog.Error("failed starting relay container", "err", err)
mainStop()
return
}
// Wait for signal
<-mainCtx.Done()
slog.Info("Shutting down gracefully by signal..")
}

View File

@@ -0,0 +1,11 @@
{
"name": "@nestri/maitred",
"version": "0.1.0",
"type": "module",
"sideEffects": false,
"scripts": {
"dev": "sst shell go run main.go"
},
"devDependencies": {},
"dependencies": {}
}

View File

@@ -1,58 +0,0 @@
package auth
import (
"encoding/json"
"fmt"
"io"
"nestri/maitred/pkg/resource"
"net/http"
"net/url"
"os"
"os/exec"
"github.com/charmbracelet/log"
)
type UserCredentials struct {
AccessToken string `json:"access_token"`
RefreshToken string `json:"refresh_token"`
}
func FetchUserToken(teamSlug string) (*UserCredentials, error) {
hostname, err := os.Hostname()
if err != nil {
log.Fatal("Could not get the hostname")
}
data := url.Values{}
data.Set("grant_type", "client_credentials")
data.Set("client_id", "device")
data.Set("client_secret", resource.Resource.AuthFingerprintKey.Value)
data.Set("team", teamSlug)
data.Set("hostname", hostname)
data.Set("provider", "device")
resp, err := http.PostForm(resource.Resource.Auth.Url+"/token", data)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
body, _ := io.ReadAll(resp.Body)
fmt.Println(string(body))
return nil, fmt.Errorf("failed to auth: " + string(body))
}
credentials := UserCredentials{}
err = json.NewDecoder(resp.Body).Decode(&credentials)
if err != nil {
return nil, err
}
return &credentials, nil
}
func GetHostname() string {
cmd, err := exec.Command("cat", "/etc/hostname").Output()
if err != nil {
log.Error("error getting container hostname", "err", err)
}
output := string(cmd)
return output
}

View File

@@ -1,27 +0,0 @@
package party
import (
"github.com/charmbracelet/log"
)
// logger implements the paho.Logger interface
type logger struct {
prefix string
}
// Println is the library provided NOOPLogger's
// implementation of the required interface function()
func (l logger) Println(v ...interface{}) {
// fmt.Println(append([]interface{}{l.prefix + ":"}, v...)...)
log.Info(l.prefix, "info", v)
}
// Printf is the library provided NOOPLogger's
// implementation of the required interface function(){}
func (l logger) Printf(format string, v ...interface{}) {
// if len(format) > 0 && format[len(format)-1] != '\n' {
// format = format + "\n" // some log calls in paho do not add \n
// }
// fmt.Printf(l.prefix+":"+format, v...)
log.Info(l.prefix, "info", v)
}

View File

@@ -1,129 +0,0 @@
package party
import (
"context"
"fmt"
"nestri/maitred/pkg/auth"
"nestri/maitred/pkg/resource"
"net/url"
"os"
"os/signal"
"syscall"
"time"
"github.com/charmbracelet/log"
"github.com/eclipse/paho.golang/autopaho"
"github.com/eclipse/paho.golang/paho"
)
func Run(teamSlug string) {
var topic = fmt.Sprintf("%s/%s/%s", resource.Resource.App.Name, resource.Resource.App.Stage, teamSlug)
var serverURL = fmt.Sprintf("wss://%s/mqtt?x-amz-customauthorizer-name=%s", resource.Resource.Party.Endpoint, resource.Resource.Party.Authorizer)
var clientID = generateClientID()
hostname, err := os.Hostname()
if err != nil {
log.Fatal(" Could not get the hostname")
}
// App will run until cancelled by user (e.g. ctrl-c)
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer stop()
userTokens, err := auth.FetchUserToken(teamSlug)
if err != nil {
log.Error("Error trying to request for credentials", "err", err)
stop()
}
// We will connect to the Eclipse test server (note that you may see messages that other users publish)
u, err := url.Parse(serverURL)
if err != nil {
panic(err)
}
router := paho.NewStandardRouter()
router.DefaultHandler(func(p *paho.Publish) {
infoLogger.Info("Router", "info", fmt.Sprintf("default handler received message with topic: %s\n", p.Topic))
})
cliCfg := autopaho.ClientConfig{
ServerUrls: []*url.URL{u},
ConnectUsername: "", // Must be empty for the authorizer
ConnectPassword: []byte(userTokens.AccessToken),
KeepAlive: 20, // Keepalive message should be sent every 20 seconds
// We don't want the broker to delete any session info when we disconnect
CleanStartOnInitialConnection: true,
SessionExpiryInterval: 60, // Session remains live 60 seconds after disconnect
ReconnectBackoff: autopaho.NewConstantBackoff(time.Second),
OnConnectionUp: func(cm *autopaho.ConnectionManager, connAck *paho.Connack) {
infoLogger.Info("Router", "info", "MQTT connection is up and running")
if _, err := cm.Subscribe(context.Background(), &paho.Subscribe{
Subscriptions: []paho.SubscribeOptions{
{Topic: fmt.Sprintf("%s/#", topic), QoS: 1}, //Listen to all messages from this team
},
}); err != nil {
panic(fmt.Sprintf("failed to subscribe (%s). This is likely to mean no messages will be received.", err))
}
},
Errors: logger{prefix: "subscribe"},
OnConnectError: func(err error) {
infoLogger.Error("Router", "err", fmt.Sprintf("error whilst attempting connection: %s\n", err))
},
// eclipse/paho.golang/paho provides base mqtt functionality, the below config will be passed in for each connection
ClientConfig: paho.ClientConfig{
// If you are using QOS 1/2, then it's important to specify a client id (which must be unique)
ClientID: clientID,
// OnPublishReceived is a slice of functions that will be called when a message is received.
// You can write the function(s) yourself or use the supplied Router
OnPublishReceived: []func(paho.PublishReceived) (bool, error){
func(pr paho.PublishReceived) (bool, error) {
router.Route(pr.Packet.Packet())
return true, nil // we assume that the router handles all messages (todo: amend router API)
}},
OnClientError: func(err error) { infoLogger.Error("Router", "err", fmt.Sprintf("client error: %s\n", err)) },
OnServerDisconnect: func(d *paho.Disconnect) {
if d.Properties != nil {
infoLogger.Info("Router", "info", fmt.Sprintf("server requested disconnect: %s\n", d.Properties.ReasonString))
} else {
infoLogger.Info("Router", "info", fmt.Sprintf("server requested disconnect; reason code: %d\n", d.ReasonCode))
}
},
},
}
c, err := autopaho.NewConnection(ctx, cliCfg) // starts process; will reconnect until context cancelled
if err != nil {
panic(err)
}
if err = c.AwaitConnection(ctx); err != nil {
panic(err)
}
// Handlers can be registered/deregistered at any time. It's important to note that you need to subscribe AND create
// a handler
//TODO: Have different routes for different things, like starting a session, stopping a session, and stopping the container altogether
//TODO: Listen on team-slug/container-hostname topic only
router.RegisterHandler(fmt.Sprintf("%s/%s/start", topic, hostname), func(p *paho.Publish) {
infoLogger.Info("Router", "info", fmt.Sprintf("start a game: %s\n", p.Topic))
})
router.RegisterHandler(fmt.Sprintf("%s/%s/stop", topic, hostname), func(p *paho.Publish) { fmt.Printf("stop the game that is running: %s\n", p.Topic) })
router.RegisterHandler(fmt.Sprintf("%s/%s/download", topic, hostname), func(p *paho.Publish) { fmt.Printf("download a game: %s\n", p.Topic) })
router.RegisterHandler(fmt.Sprintf("%s/%s/quit", topic, hostname), func(p *paho.Publish) { stop() }) // Stop and quit this running container
// We publish three messages to test out the various route handlers
// topics := []string{"test/test", "test/test/foo", "test/xxNoMatch", "test/quit"}
// for _, t := range topics {
// if _, err := c.Publish(ctx, &paho.Publish{
// QoS: 1,
// Topic: fmt.Sprintf("%s/%s", topic, t),
// Payload: []byte("TestMessage on topic: " + t),
// }); err != nil {
// if ctx.Err() == nil {
// panic(err) // Publish will exit when context cancelled or if something went wrong
// }
// }
// }
<-c.Done() // Wait for clean shutdown (cancelling the context triggered the shutdown)
}

View File

@@ -1,31 +0,0 @@
package party
import (
"fmt"
"os"
"time"
"math/rand"
"github.com/charmbracelet/log"
"github.com/oklog/ulid/v2"
)
var (
infoLogger = log.NewWithOptions(os.Stderr, log.Options{
ReportTimestamp: true,
TimeFormat: time.Kitchen,
// Prefix: "Realtime",
})
)
func generateClientID() string {
// Create a source of entropy (use cryptographically secure randomness in production)
entropy := rand.New(rand.NewSource(time.Now().UnixNano()))
// Generate a new ULID
id := ulid.MustNew(ulid.Timestamp(time.Now()), entropy)
// Create the client ID string
return fmt.Sprintf("client_%s", id.String())
}

9
packages/maitred/sst-env.d.ts vendored Normal file
View File

@@ -0,0 +1,9 @@
/* This file is auto-generated by SST. Do not edit. */
/* tslint:disable */
/* eslint-disable */
/* deno-fmt-ignore-file */
/// <reference path="../../sst-env.d.ts" />
import "sst"
export {}