mirror of
https://github.com/nestriness/nestri.git
synced 2025-12-12 08:45:38 +02:00
⭐ feat: Migrate from WebSocket to libp2p for peer-to-peer connectivity (#286)
## Description Whew, some stuff is still not re-implemented, but it's working! Rabbit's gonna explode with the amount of changes I reckon 😅 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Introduced a peer-to-peer relay system using libp2p with enhanced stream forwarding, room state synchronization, and mDNS peer discovery. - Added decentralized room and participant management, metrics publishing, and safe, size-limited, concurrent message streaming with robust framing and callback dispatching. - Implemented asynchronous, callback-driven message handling over custom libp2p streams replacing WebSocket signaling. - **Improvements** - Migrated signaling and stream protocols from WebSocket to libp2p, improving reliability and scalability. - Simplified configuration and environment variables, removing deprecated flags and adding persistent data support. - Enhanced logging, error handling, and connection management for better observability and robustness. - Refined RTP header extension registration and NAT IP handling for improved WebRTC performance. - **Bug Fixes** - Improved ICE candidate buffering and SDP negotiation in WebRTC connections. - Fixed NAT IP and UDP port range configuration issues. - **Refactor** - Modularized codebase, reorganized relay and server logic, and removed deprecated WebSocket-based components. - Streamlined message structures, removed obsolete enums and message types, and simplified SafeMap concurrency. - Replaced WebSocket signaling with libp2p stream protocols in server and relay components. - **Chores** - Updated and cleaned dependencies across Go, Rust, and JavaScript packages. - Added `.gitignore` for persistent data directory in relay package. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: DatCaptainHorse <DatCaptainHorse@users.noreply.github.com> Co-authored-by: Philipp Neumann <3daquawolf@gmail.com>
This commit is contained in:
committed by
GitHub
parent
e67a8d2b32
commit
6e82eff9e2
128
packages/relay/internal/core/metrics.go
Normal file
128
packages/relay/internal/core/metrics.go
Normal file
@@ -0,0 +1,128 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/libp2p/go-libp2p/core/peer"
|
||||
)
|
||||
|
||||
// --- Metrics Collection and Publishing ---
|
||||
|
||||
// periodicMetricsPublisher periodically gathers local metrics and publishes them.
|
||||
func (r *Relay) periodicMetricsPublisher(ctx context.Context) {
|
||||
ticker := time.NewTicker(metricsPublishInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
// Publish immediately on start
|
||||
if err := r.publishRelayMetrics(ctx); err != nil {
|
||||
slog.Error("Failed to publish initial relay metrics", "err", err)
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
slog.Info("Stopping metrics publisher")
|
||||
return
|
||||
case <-ticker.C:
|
||||
if err := r.publishRelayMetrics(ctx); err != nil {
|
||||
slog.Error("Failed to publish relay metrics", "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// publishRelayMetrics sends the current relay status to the mesh.
|
||||
func (r *Relay) publishRelayMetrics(ctx context.Context) error {
|
||||
if r.pubTopicRelayMetrics == nil {
|
||||
slog.Warn("Cannot publish relay metrics: topic is nil")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check all peer latencies
|
||||
r.checkAllPeerLatencies(ctx)
|
||||
|
||||
data, err := json.Marshal(r.RelayInfo)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal relay status: %w", err)
|
||||
}
|
||||
|
||||
if pubErr := r.pubTopicRelayMetrics.Publish(ctx, data); pubErr != nil {
|
||||
// Don't return error on publish failure, just log
|
||||
slog.Error("Failed to publish relay metrics message", "err", pubErr)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkAllPeerLatencies measures latency to all currently connected peers.
|
||||
func (r *Relay) checkAllPeerLatencies(ctx context.Context) {
|
||||
var wg sync.WaitGroup
|
||||
for _, p := range r.Host.Network().Peers() {
|
||||
if p == r.ID {
|
||||
continue // Skip self
|
||||
}
|
||||
wg.Add(1)
|
||||
// Run checks concurrently
|
||||
go func(peerID peer.ID) {
|
||||
defer wg.Done()
|
||||
go r.measureLatencyToPeer(ctx, peerID)
|
||||
}(p)
|
||||
}
|
||||
wg.Wait() // Wait for all latency checks to complete
|
||||
}
|
||||
|
||||
// measureLatencyToPeer pings a specific peer and updates the local latency map.
|
||||
func (r *Relay) measureLatencyToPeer(ctx context.Context, peerID peer.ID) {
|
||||
// Check peer status first
|
||||
if !r.hasConnectedPeer(peerID) {
|
||||
return
|
||||
}
|
||||
|
||||
// Create a context for the ping operation
|
||||
pingCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
// Use the PingService instance stored in the Relay struct
|
||||
if r.PingService == nil {
|
||||
slog.Error("PingService is nil, cannot measure latency", "peer", peerID)
|
||||
return
|
||||
}
|
||||
resultsCh := r.PingService.Ping(pingCtx, peerID)
|
||||
|
||||
// Wait for the result (or timeout)
|
||||
select {
|
||||
case <-pingCtx.Done():
|
||||
// Ping timed out
|
||||
slog.Warn("Latency check canceled", "peer", peerID, "err", pingCtx.Err())
|
||||
case result, ok := <-resultsCh:
|
||||
if !ok {
|
||||
// Channel closed unexpectedly
|
||||
slog.Warn("Ping service channel closed unexpectedly", "peer", peerID)
|
||||
return
|
||||
}
|
||||
|
||||
// Received ping result
|
||||
if result.Error != nil {
|
||||
slog.Warn("Latency check failed, removing peer from local peers map", "peer", peerID, "err", result.Error)
|
||||
// Remove from MeshPeers if ping failed
|
||||
if r.LocalMeshPeers.Has(peerID) {
|
||||
r.LocalMeshPeers.Delete(peerID)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Ping successful, update latency
|
||||
latency := result.RTT
|
||||
// Ensure latency is not zero if successful, assign a minimal value if so.
|
||||
// Sometimes RTT can be reported as 0 for very fast local connections.
|
||||
if latency <= 0 {
|
||||
latency = 1 * time.Microsecond
|
||||
}
|
||||
|
||||
r.RelayInfo.MeshLatencies.Set(peerID.String(), latency)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user