Files
netris-nestri/packages/server/src/gpu.rs
Kristian Ollikainen 41dca22d9d feat(runner): More runner improvements (#294)
## Description
Whew..

- Steam can now run without namespaces using live-patcher (because
Docker..)
- Improved NVIDIA GPU selection and handling
- Pipeline tests for GPU picking logic
- Optimizations and cleanup all around
- SSH (by default disabled) for easier instance debugging.
- CachyOS' Proton because that works without namespaces (couldn't figure
out how to enable automatically in Steam yet..)
- Package updates and partial removal of futures (libp2p is going to
switch to Tokio in next release hopefully)



<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- SSH server can now be enabled within the container for remote access
when configured.
- Added persistent live patching for Steam runtime entrypoints to
improve compatibility with namespace-less applications.
- Enhanced GPU selection with multi-GPU support and PCI bus ID matching
for improved hardware compatibility.
- Improved encoder selection by runtime testing of video encoders for
better reliability.
  - Added WebSocket transport support in peer-to-peer networking.
- Added flexible compositor and application launching with configurable
commands and improved socket handling.

- **Bug Fixes**
- Addressed NVIDIA-specific GStreamer issues by setting new environment
variables.
  - Improved error handling and logging for GPU and encoder selection.
- Fixed process monitoring to handle patcher restarts and added cleanup
logic.
- Added GStreamer cache clearing workaround for Wayland socket failures.

- **Improvements**
- Real-time logging of container processes to standard output and error
for easier monitoring.
- Enhanced process management and reduced CPU usage in protocol handling
loops.
- Updated dependency versions for greater stability and feature support.
  - Improved audio capture defaults and expanded audio pipeline support.
- Enhanced video pipeline setup with conditional handling for different
encoder APIs and DMA-BUF support.
- Refined concurrency and lifecycle management in protocol messaging for
increased robustness.
- Consistent namespace usage and updated crate references across the
codebase.
- Enhanced SSH configuration with key management, port customization,
and startup verification.
  - Improved GPU and video encoder integration in pipeline construction.
- Simplified error handling and consolidated write operations in
protocol streams.
- Removed Ludusavi installation from container image and updated package
installations.

- **Other**
- Minor formatting and style changes for better code readability and
maintainability.
- Docker build context now ignores `.idea` directory to streamline
builds.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: DatCaptainHorse <DatCaptainHorse@users.noreply.github.com>
2025-07-07 09:06:48 +03:00

211 lines
6.0 KiB
Rust

use regex::Regex;
use std::fs;
use std::process::Command;
use std::str;
#[derive(Debug, Eq, PartialEq, Clone, Hash)]
pub enum GPUVendor {
UNKNOWN,
INTEL,
NVIDIA,
AMD,
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct GPUInfo {
vendor: GPUVendor,
card_path: String,
render_path: String,
device_name: String,
pci_bus_id: String,
}
impl GPUInfo {
pub fn vendor(&self) -> &GPUVendor {
&self.vendor
}
pub fn vendor_string(&self) -> &str {
match self.vendor {
GPUVendor::INTEL => "Intel",
GPUVendor::NVIDIA => "NVIDIA",
GPUVendor::AMD => "AMD",
GPUVendor::UNKNOWN => "Unknown",
}
}
pub fn card_path(&self) -> &str {
&self.card_path
}
pub fn render_path(&self) -> &str {
&self.render_path
}
pub fn device_name(&self) -> &str {
&self.device_name
}
pub fn pci_bus_id(&self) -> &str {
&self.pci_bus_id
}
}
fn get_gpu_vendor(vendor_id: &str) -> GPUVendor {
match vendor_id {
"8086" => GPUVendor::INTEL,
"10de" => GPUVendor::NVIDIA,
"1002" => GPUVendor::AMD,
_ => GPUVendor::UNKNOWN,
}
}
/// Retrieves a list of GPUs available on the system.
/// # Returns
/// * `Vec<GPUInfo>` - A vector containing information about each GPU.
pub fn get_gpus() -> Vec<GPUInfo> {
let output = Command::new("lspci")
.args(["-mm", "-nn"])
.output()
.expect("Failed to execute lspci");
str::from_utf8(&output.stdout)
.unwrap()
.lines()
.filter_map(|line| parse_pci_device(line))
.filter(|(class_id, _, _, _)| matches!(class_id.as_str(), "0300" | "0302" | "0380"))
.filter_map(|(_, vendor_id, device_name, pci_addr)| {
get_dri_device_path(&pci_addr)
.map(|(card, render)| (vendor_id, card, render, device_name, pci_addr))
})
.map(
|(vid, card_path, render_path, device_name, pci_bus_id)| GPUInfo {
vendor: get_gpu_vendor(&vid),
card_path,
render_path,
device_name,
pci_bus_id,
},
)
.collect()
}
fn parse_pci_device(line: &str) -> Option<(String, String, String, String)> {
let re = Regex::new(
r#"^(?P<pci_addr>\S+)\s+"[^\[]*\[(?P<class_id>[0-9a-f]{4})\].*?"\s+"[^"]*?\[(?P<vendor_id>[0-9a-f]{4})\][^"]*?"\s+"(?P<device_name>[^"]+?)""#,
).unwrap();
let caps = re.captures(line)?;
// Clean device name by removing only the trailing device ID
let device_name = caps.name("device_name")?.as_str().trim();
let clean_re = Regex::new(r"\s+\[[0-9a-f]{4}\]$").unwrap();
let cleaned_name = clean_re.replace(device_name, "").trim().to_string();
Some((
caps.name("class_id")?.as_str().to_lowercase(),
caps.name("vendor_id")?.as_str().to_lowercase(),
cleaned_name,
caps.name("pci_addr")?.as_str().to_string(),
))
}
fn get_dri_device_path(pci_addr: &str) -> Option<(String, String)> {
let target_dir = format!("0000:{}", pci_addr);
let entries = fs::read_dir("/sys/bus/pci/devices").ok()?;
for entry in entries.flatten() {
if !entry.path().to_string_lossy().contains(&target_dir) {
continue;
}
let mut card = String::new();
let mut render = String::new();
let drm_path = entry.path().join("drm");
for drm_entry in fs::read_dir(drm_path).ok()?.flatten() {
let name = drm_entry.file_name().to_string_lossy().into_owned();
if name.starts_with("card") {
card = format!("/dev/dri/{}", name);
} else if name.starts_with("renderD") {
render = format!("/dev/dri/{}", name);
}
if !card.is_empty() && !render.is_empty() {
break;
}
}
if !card.is_empty() {
return Some((card, render));
}
}
None
}
pub fn get_gpus_by_vendor(gpus: &[GPUInfo], vendor: &str) -> Vec<GPUInfo> {
let target = vendor.to_lowercase();
gpus.iter()
.filter(|gpu| gpu.vendor_string().to_lowercase() == target)
.cloned()
.collect()
}
pub fn get_gpus_by_device_name(gpus: &[GPUInfo], substring: &str) -> Vec<GPUInfo> {
let target = substring.to_lowercase();
gpus.iter()
.filter(|gpu| gpu.device_name.to_lowercase().contains(&target))
.cloned()
.collect()
}
pub fn get_gpu_by_card_path(gpus: &[GPUInfo], path: &str) -> Option<GPUInfo> {
gpus.iter()
.find(|gpu| {
gpu.card_path.eq_ignore_ascii_case(path) || gpu.render_path.eq_ignore_ascii_case(path)
})
.cloned()
}
pub fn get_nvidia_gpu_by_cuda_id(gpus: &[GPUInfo], cuda_device_id: usize) -> Option<GPUInfo> {
// Check if nvidia-smi is available
if Command::new("nvidia-smi").arg("--help").output().is_err() {
tracing::warn!("nvidia-smi is not available");
return None;
}
// Run nvidia-smi to get information about the CUDA device
let output = Command::new("nvidia-smi")
.args([
"--query-gpu=pci.bus_id",
"--format=csv,noheader",
"-i",
&cuda_device_id.to_string(),
])
.output()
.ok()?;
if !output.status.success() {
return None;
}
// Parse the output to get the PCI bus ID
let pci_bus_id = str::from_utf8(&output.stdout).ok()?.trim().to_uppercase(); // nvidia-smi returns uppercase PCI IDs
// Convert from 00000000:05:00.0 to 05:00.0 if needed
let pci_bus_id = if pci_bus_id.starts_with("00000000:") {
pci_bus_id[9..].to_string() // Skip the domain part
} else if pci_bus_id.starts_with("0000:") {
pci_bus_id[5..].to_string() // Alternate check for older nvidia-smi versions
} else {
pci_bus_id
};
// Find the GPU with the matching PCI bus ID
gpus.iter()
.find(|gpu| gpu.vendor == GPUVendor::NVIDIA && gpu.pci_bus_id.to_uppercase() == pci_bus_id)
.cloned()
}