Files
netris-nestri/packages/relay/internal/core/metrics.go
Kristian Ollikainen 6e82eff9e2 feat: Migrate from WebSocket to libp2p for peer-to-peer connectivity (#286)
## Description
Whew, some stuff is still not re-implemented, but it's working!

Rabbit's gonna explode with the amount of changes I reckon 😅



<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced a peer-to-peer relay system using libp2p with enhanced
stream forwarding, room state synchronization, and mDNS peer discovery.
- Added decentralized room and participant management, metrics
publishing, and safe, size-limited, concurrent message streaming with
robust framing and callback dispatching.
- Implemented asynchronous, callback-driven message handling over custom
libp2p streams replacing WebSocket signaling.
- **Improvements**
- Migrated signaling and stream protocols from WebSocket to libp2p,
improving reliability and scalability.
- Simplified configuration and environment variables, removing
deprecated flags and adding persistent data support.
- Enhanced logging, error handling, and connection management for better
observability and robustness.
- Refined RTP header extension registration and NAT IP handling for
improved WebRTC performance.
- **Bug Fixes**
- Improved ICE candidate buffering and SDP negotiation in WebRTC
connections.
  - Fixed NAT IP and UDP port range configuration issues.
- **Refactor**
- Modularized codebase, reorganized relay and server logic, and removed
deprecated WebSocket-based components.
- Streamlined message structures, removed obsolete enums and message
types, and simplified SafeMap concurrency.
- Replaced WebSocket signaling with libp2p stream protocols in server
and relay components.
- **Chores**
- Updated and cleaned dependencies across Go, Rust, and JavaScript
packages.
  - Added `.gitignore` for persistent data directory in relay package.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: DatCaptainHorse <DatCaptainHorse@users.noreply.github.com>
Co-authored-by: Philipp Neumann <3daquawolf@gmail.com>
2025-06-06 16:48:49 +03:00

129 lines
3.3 KiB
Go

package core
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"sync"
"time"
"github.com/libp2p/go-libp2p/core/peer"
)
// --- Metrics Collection and Publishing ---
// periodicMetricsPublisher periodically gathers local metrics and publishes them.
func (r *Relay) periodicMetricsPublisher(ctx context.Context) {
ticker := time.NewTicker(metricsPublishInterval)
defer ticker.Stop()
// Publish immediately on start
if err := r.publishRelayMetrics(ctx); err != nil {
slog.Error("Failed to publish initial relay metrics", "err", err)
}
for {
select {
case <-ctx.Done():
slog.Info("Stopping metrics publisher")
return
case <-ticker.C:
if err := r.publishRelayMetrics(ctx); err != nil {
slog.Error("Failed to publish relay metrics", "err", err)
}
}
}
}
// publishRelayMetrics sends the current relay status to the mesh.
func (r *Relay) publishRelayMetrics(ctx context.Context) error {
if r.pubTopicRelayMetrics == nil {
slog.Warn("Cannot publish relay metrics: topic is nil")
return nil
}
// Check all peer latencies
r.checkAllPeerLatencies(ctx)
data, err := json.Marshal(r.RelayInfo)
if err != nil {
return fmt.Errorf("failed to marshal relay status: %w", err)
}
if pubErr := r.pubTopicRelayMetrics.Publish(ctx, data); pubErr != nil {
// Don't return error on publish failure, just log
slog.Error("Failed to publish relay metrics message", "err", pubErr)
}
return nil
}
// checkAllPeerLatencies measures latency to all currently connected peers.
func (r *Relay) checkAllPeerLatencies(ctx context.Context) {
var wg sync.WaitGroup
for _, p := range r.Host.Network().Peers() {
if p == r.ID {
continue // Skip self
}
wg.Add(1)
// Run checks concurrently
go func(peerID peer.ID) {
defer wg.Done()
go r.measureLatencyToPeer(ctx, peerID)
}(p)
}
wg.Wait() // Wait for all latency checks to complete
}
// measureLatencyToPeer pings a specific peer and updates the local latency map.
func (r *Relay) measureLatencyToPeer(ctx context.Context, peerID peer.ID) {
// Check peer status first
if !r.hasConnectedPeer(peerID) {
return
}
// Create a context for the ping operation
pingCtx, cancel := context.WithCancel(ctx)
defer cancel()
// Use the PingService instance stored in the Relay struct
if r.PingService == nil {
slog.Error("PingService is nil, cannot measure latency", "peer", peerID)
return
}
resultsCh := r.PingService.Ping(pingCtx, peerID)
// Wait for the result (or timeout)
select {
case <-pingCtx.Done():
// Ping timed out
slog.Warn("Latency check canceled", "peer", peerID, "err", pingCtx.Err())
case result, ok := <-resultsCh:
if !ok {
// Channel closed unexpectedly
slog.Warn("Ping service channel closed unexpectedly", "peer", peerID)
return
}
// Received ping result
if result.Error != nil {
slog.Warn("Latency check failed, removing peer from local peers map", "peer", peerID, "err", result.Error)
// Remove from MeshPeers if ping failed
if r.LocalMeshPeers.Has(peerID) {
r.LocalMeshPeers.Delete(peerID)
}
return
}
// Ping successful, update latency
latency := result.RTT
// Ensure latency is not zero if successful, assign a minimal value if so.
// Sometimes RTT can be reported as 0 for very fast local connections.
if latency <= 0 {
latency = 1 * time.Microsecond
}
r.RelayInfo.MeshLatencies.Set(peerID.String(), latency)
}
}