feat: Add streaming support (#125)

This adds:
- [x] Keyboard and mouse handling on the frontend
- [x] Video and audio streaming from the backend to the frontend
- [x] Input server that works with Websockets

Update - 17/11
- [ ] Master docker container to run this
- [ ] Steam runtime
- [ ] Entrypoint.sh

---------

Co-authored-by: Kristian Ollikainen <14197772+DatCaptainHorse@users.noreply.github.com>
Co-authored-by: Kristian Ollikainen <DatCaptainHorse@users.noreply.github.com>
This commit is contained in:
Wanjohi
2024-12-08 14:54:56 +03:00
committed by GitHub
parent 5eb21eeadb
commit 379db1c87b
137 changed files with 12737 additions and 5234 deletions

View File

@@ -1,15 +1,67 @@
import { Deferred } from "../common/async"
import type { Frame } from "../karp/frame"
import type { Group, Track } from "../transfork"
import { Closed } from "../transfork/error"
const SUPPORTED = [
// TODO support AAC
// "mp4a"
"Opus",
]
export class Packer {
#source: MediaStreamTrackProcessor<AudioData>
#encoder: Encoder
#data: Track
#current?: Group
constructor(track: MediaStreamAudioTrack, encoder: Encoder, data: Track) {
this.#source = new MediaStreamTrackProcessor({ track })
this.#encoder = encoder
this.#data = data
}
async run() {
const output = new WritableStream({
write: (chunk) => this.#write(chunk),
close: () => this.#close(),
abort: (e) => this.#close(e),
})
return this.#source.readable.pipeThrough(this.#encoder.frames).pipeTo(output)
}
#write(frame: Frame) {
// TODO use a fixed interval instead of keyframes (audio)
// TODO actually just align with video
if (!this.#current || frame.type === "key") {
if (this.#current) {
this.#current.close()
}
this.#current = this.#data.appendGroup()
}
this.#current.writeFrame(frame.data)
}
#close(err?: unknown) {
const closed = Closed.from(err)
if (this.#current) {
this.#current.close(closed)
}
this.#data.close(closed)
}
}
export class Encoder {
#encoder!: AudioEncoder
#encoderConfig: AudioEncoderConfig
#decoderConfig?: AudioDecoderConfig
#decoderConfig = new Deferred<AudioDecoderConfig>()
frames: TransformStream<AudioData, AudioDecoderConfig | EncodedAudioChunk>
frames: TransformStream<AudioData, EncodedAudioChunk>
constructor(config: AudioEncoderConfig) {
this.#encoderConfig = config
@@ -21,7 +73,7 @@ export class Encoder {
})
}
#start(controller: TransformStreamDefaultController<AudioDecoderConfig | EncodedAudioChunk>) {
#start(controller: TransformStreamDefaultController<EncodedAudioChunk>) {
this.#encoder = new AudioEncoder({
output: (frame, metadata) => {
this.#enqueue(controller, frame, metadata)
@@ -40,17 +92,16 @@ export class Encoder {
}
#enqueue(
controller: TransformStreamDefaultController<AudioDecoderConfig | EncodedAudioChunk>,
controller: TransformStreamDefaultController<EncodedAudioChunk>,
frame: EncodedAudioChunk,
metadata?: EncodedAudioChunkMetadata,
) {
const config = metadata?.decoderConfig
if (config && !this.#decoderConfig) {
if (config && !this.#decoderConfig.pending) {
const config = metadata.decoderConfig
if (!config) throw new Error("missing decoder config")
controller.enqueue(config)
this.#decoderConfig = config
this.#decoderConfig.resolve(config)
}
controller.enqueue(frame)
@@ -72,4 +123,8 @@ export class Encoder {
get config() {
return this.#encoderConfig
}
async decoderConfig(): Promise<AudioDecoderConfig> {
return await this.#decoderConfig.promise
}
}

View File

@@ -1,15 +1,14 @@
import { Connection, SubscribeRecv } from "../transport"
import { asError } from "../common/error"
import { Segment } from "./segment"
import { Track } from "./track"
import * as Catalog from "../media/catalog"
import * as Catalog from "../karp/catalog"
import * as Transfork from "../transfork"
import * as Audio from "./audio"
import * as Video from "./video"
import { isAudioTrackSettings, isVideoTrackSettings } from "../common/settings"
export interface BroadcastConfig {
namespace: string
connection: Connection
path: string[]
media: MediaStream
id?: number
audio?: AudioEncoderConfig
video?: VideoEncoderConfig
@@ -21,221 +20,89 @@ export interface BroadcastConfigTrack {
}
export class Broadcast {
#tracks = new Map<string, Track>()
readonly config: BroadcastConfig
readonly catalog: Catalog.Root
readonly connection: Connection
readonly namespace: string
#running: Promise<void>
#config: BroadcastConfig
#path: string[]
constructor(config: BroadcastConfig) {
this.connection = config.connection
this.config = config
this.namespace = config.namespace
const id = config.id || new Date().getTime() / 1000
const tracks: Catalog.Track[] = []
this.#config = config
this.#path = config.path.concat(id.toString())
}
for (const media of this.config.media.getTracks()) {
const track = new Track(media, config)
this.#tracks.set(track.name, track)
async publish(connection: Transfork.Connection) {
const broadcast: Catalog.Broadcast = {
path: this.#config.path,
audio: [],
video: [],
}
for (const media of this.#config.media.getTracks()) {
const settings = media.getSettings()
const info = {
name: media.id, // TODO way too verbose
priority: media.kind === "video" ? 1 : 2,
}
const track = new Transfork.Track(this.#config.path.concat(info.name), info.priority)
if (isVideoTrackSettings(settings)) {
if (!config.video) {
if (!this.#config.video) {
throw new Error("no video configuration provided")
}
const video: Catalog.VideoTrack = {
namespace: this.namespace,
name: `${track.name}.m4s`,
initTrack: `${track.name}.mp4`,
selectionParams: {
mimeType: "video/mp4",
codec: config.video.codec,
width: settings.width,
height: settings.height,
framerate: settings.frameRate,
bitrate: config.video.bitrate,
},
const encoder = new Video.Encoder(this.#config.video)
const packer = new Video.Packer(media as MediaStreamVideoTrack, encoder, track)
// TODO handle error
packer.run().catch((err) => console.error("failed to run video packer: ", err))
const decoder = await encoder.decoderConfig()
const description = decoder.description ? new Uint8Array(decoder.description as ArrayBuffer) : undefined
const video: Catalog.Video = {
track: info,
codec: decoder.codec,
description: description,
resolution: { width: settings.width, height: settings.height },
frame_rate: settings.frameRate,
bitrate: this.#config.video.bitrate,
}
tracks.push(video)
broadcast.video.push(video)
} else if (isAudioTrackSettings(settings)) {
if (!config.audio) {
if (!this.#config.audio) {
throw new Error("no audio configuration provided")
}
const audio: Catalog.AudioTrack = {
namespace: this.namespace,
name: `${track.name}.m4s`,
initTrack: `${track.name}.mp4`,
selectionParams: {
mimeType: "audio/ogg",
codec: config.audio.codec,
samplerate: settings.sampleRate,
//sampleSize: settings.sampleSize,
channelConfig: `${settings.channelCount}`,
bitrate: config.audio.bitrate,
},
const encoder = new Audio.Encoder(this.#config.audio)
const packer = new Audio.Packer(media as MediaStreamAudioTrack, encoder, track)
packer.run().catch((err) => console.error("failed to run audio packer: ", err)) // TODO handle error
const decoder = await encoder.decoderConfig()
const audio: Catalog.Audio = {
track: info,
codec: decoder.codec,
sample_rate: settings.sampleRate,
channel_count: settings.channelCount,
bitrate: this.#config.audio.bitrate,
}
tracks.push(audio)
broadcast.audio.push(audio)
} else {
throw new Error(`unknown track type: ${media.kind}`)
}
connection.publish(track.reader())
}
this.catalog = {
version: 1,
streamingFormat: 1,
streamingFormatVersion: "0.2",
supportsDeltaUpdates: false,
commonTrackFields: {
packaging: "cmaf",
renderGroup: 1,
},
tracks,
}
const track = new Transfork.Track(this.#config.path.concat("catalog.json"), 0)
track.appendGroup().writeFrames(Catalog.encode(broadcast))
this.#running = this.#run()
connection.publish(track.reader())
}
async #run() {
await this.connection.announce(this.namespace)
for (;;) {
const subscriber = await this.connection.subscribed()
if (!subscriber) break
// Run an async task to serve each subscription.
this.#serveSubscribe(subscriber).catch((e) => {
const err = asError(e)
console.warn("failed to serve subscribe", err)
})
}
}
async #serveSubscribe(subscriber: SubscribeRecv) {
try {
const [base, ext] = splitExt(subscriber.track)
if (ext === "catalog") {
await this.#serveCatalog(subscriber, base)
} else if (ext === "mp4") {
await this.#serveInit(subscriber, base)
} else if (ext === "m4s") {
await this.#serveTrack(subscriber, base)
} else {
throw new Error(`unknown subscription: ${subscriber.track}`)
}
} catch (e) {
const err = asError(e)
await subscriber.close(1n, `failed to process subscribe: ${err.message}`)
} finally {
// TODO we can't close subscribers because there's no support for clean termination
// await subscriber.close()
}
}
async #serveCatalog(subscriber: SubscribeRecv, name: string) {
// We only support ".catalog"
if (name !== "") throw new Error(`unknown catalog: ${name}`)
const bytes = Catalog.encode(this.catalog)
// Send a SUBSCRIBE_OK
await subscriber.ack()
const stream = await subscriber.group({ group: 0 })
await stream.write({ object: 0, payload: bytes })
await stream.close()
}
async #serveInit(subscriber: SubscribeRecv, name: string) {
const track = this.#tracks.get(name)
if (!track) throw new Error(`no track with name ${subscriber.track}`)
// Send a SUBSCRIBE_OK
await subscriber.ack()
const init = await track.init()
const stream = await subscriber.group({ group: 0 })
await stream.write({ object: 0, payload: init })
await stream.close()
}
async #serveTrack(subscriber: SubscribeRecv, name: string) {
const track = this.#tracks.get(name)
if (!track) throw new Error(`no track with name ${subscriber.track}`)
// Send a SUBSCRIBE_OK
await subscriber.ack()
const segments = track.segments().getReader()
for (;;) {
const { value: segment, done } = await segments.read()
if (done) break
// Serve the segment and log any errors that occur.
this.#serveSegment(subscriber, segment).catch((e) => {
const err = asError(e)
console.warn("failed to serve segment", err)
})
}
}
async #serveSegment(subscriber: SubscribeRecv, segment: Segment) {
// Create a new stream for each segment.
const stream = await subscriber.group({
group: segment.id,
priority: 0, // TODO
})
let object = 0
// Pipe the segment to the stream.
const chunks = segment.chunks().getReader()
for (;;) {
const { value, done } = await chunks.read()
if (done) break
await stream.write({
object,
payload: value,
})
object += 1
}
await stream.close()
}
// Attach the captured video stream to the given video element.
attach(video: HTMLVideoElement) {
video.srcObject = this.config.media
}
close() {
// TODO implement publish close
}
// Returns the error message when the connection is closed
async closed(): Promise<Error> {
try {
await this.#running
return new Error("closed") // clean termination
} catch (e) {
return asError(e)
}
}
}
function splitExt(s: string): [string, string] {
const i = s.lastIndexOf(".")
if (i < 0) throw new Error(`no extension found`)
return [s.substring(0, i), s.substring(i + 1)]
close() {}
}

View File

@@ -1,7 +0,0 @@
// Extends EncodedVideoChunk, allowing a new "init" type
export interface Chunk {
type: "init" | "key" | "delta"
timestamp: number // microseconds
duration: number // microseconds
data: Uint8Array
}

View File

@@ -1,165 +0,0 @@
import * as MP4 from "../media/mp4"
import { Chunk } from "./chunk"
type DecoderConfig = AudioDecoderConfig | VideoDecoderConfig
type EncodedChunk = EncodedAudioChunk | EncodedVideoChunk
export class Container {
#mp4: MP4.ISOFile
#frame?: EncodedAudioChunk | EncodedVideoChunk // 1 frame buffer
#track?: number
#segment = 0
encode: TransformStream<DecoderConfig | EncodedChunk, Chunk>
constructor() {
this.#mp4 = new MP4.ISOFile()
this.#mp4.init()
this.encode = new TransformStream({
transform: (frame, controller) => {
if (isDecoderConfig(frame)) {
return this.#init(frame, controller)
} else {
return this.#enqueue(frame, controller)
}
},
})
}
#init(frame: DecoderConfig, controller: TransformStreamDefaultController<Chunk>) {
if (this.#track) throw new Error("duplicate decoder config")
let codec = frame.codec.substring(0, 4)
if (codec == "opus") {
codec = "Opus"
}
const options: MP4.TrackOptions = {
type: codec,
timescale: 1_000_000,
}
if (isVideoConfig(frame)) {
options.width = frame.codedWidth
options.height = frame.codedHeight
} else {
options.channel_count = frame.numberOfChannels
options.samplerate = frame.sampleRate
}
if (!frame.description) throw new Error("missing frame description")
const desc = frame.description as ArrayBufferLike
if (codec === "avc1") {
options.avcDecoderConfigRecord = desc
} else if (codec === "hev1") {
options.hevcDecoderConfigRecord = desc
} else if (codec === "Opus") {
// description is an identification header: https://datatracker.ietf.org/doc/html/rfc7845#section-5.1
// The first 8 bytes are the magic string "OpusHead", followed by what we actually want.
const dops = new MP4.BoxParser.dOpsBox(undefined)
// Annoyingly, the header is little endian while MP4 is big endian, so we have to parse.
const data = new MP4.Stream(desc, 8, MP4.Stream.LITTLE_ENDIAN)
dops.parse(data)
dops.Version = 0
options.description = dops
options.hdlr = "soun"
} else {
throw new Error(`unsupported codec: ${codec}`)
}
this.#track = this.#mp4.addTrack(options)
if (!this.#track) throw new Error("failed to initialize MP4 track")
const buffer = MP4.ISOFile.writeInitializationSegment(this.#mp4.ftyp!, this.#mp4.moov!, 0, 0)
const data = new Uint8Array(buffer)
controller.enqueue({
type: "init",
timestamp: 0,
duration: 0,
data,
})
}
#enqueue(frame: EncodedChunk, controller: TransformStreamDefaultController<Chunk>) {
// Check if we should create a new segment
if (frame.type == "key") {
this.#segment += 1
} else if (this.#segment == 0) {
throw new Error("must start with keyframe")
}
// We need a one frame buffer to compute the duration
if (!this.#frame) {
this.#frame = frame
return
}
const duration = frame.timestamp - this.#frame.timestamp
// TODO avoid this extra copy by writing to the mdat directly
// ...which means changing mp4box.js to take an offset instead of ArrayBuffer
const buffer = new Uint8Array(this.#frame.byteLength)
this.#frame.copyTo(buffer)
if (!this.#track) throw new Error("missing decoder config")
// Add the sample to the container
this.#mp4.addSample(this.#track, buffer, {
duration,
dts: this.#frame.timestamp,
cts: this.#frame.timestamp,
is_sync: this.#frame.type == "key",
})
const stream = new MP4.Stream(undefined, 0, MP4.Stream.BIG_ENDIAN)
// Moof and mdat atoms are written in pairs.
// TODO remove the moof/mdat from the Box to reclaim memory once everything works
for (;;) {
const moof = this.#mp4.moofs.shift()
const mdat = this.#mp4.mdats.shift()
if (!moof && !mdat) break
if (!moof) throw new Error("moof missing")
if (!mdat) throw new Error("mdat missing")
moof.write(stream)
mdat.write(stream)
}
// TODO avoid this extra copy by writing to the buffer provided in copyTo
const data = new Uint8Array(stream.buffer)
controller.enqueue({
type: this.#frame.type,
timestamp: this.#frame.timestamp,
duration: this.#frame.duration ?? 0,
data,
})
this.#frame = frame
}
/* TODO flush the last frame
#flush(controller: TransformStreamDefaultController<Chunk>) {
if (this.#frame) {
// TODO guess the duration
this.#enqueue(this.#frame, 0, controller)
}
}
*/
}
function isDecoderConfig(frame: DecoderConfig | EncodedChunk): frame is DecoderConfig {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
return (frame as DecoderConfig).codec !== undefined
}
function isVideoConfig(frame: DecoderConfig): frame is VideoDecoderConfig {
return (frame as VideoDecoderConfig).codedWidth !== undefined
}

View File

@@ -1,10 +1,10 @@
import { Chunk } from "./chunk"
import type { Frame } from "../karp/frame"
export class Segment {
id: number
// Take in a stream of chunks
input: WritableStream<Chunk>
// Take in a stream of frames
input: WritableStream<Frame>
// Output a stream of bytes, which we fork for each new subscriber.
#cache: ReadableStream<Uint8Array>
@@ -16,16 +16,18 @@ export class Segment {
// Set a max size for each segment, dropping the tail if it gets too long.
// We tee the reader, so this limit applies to the FASTEST reader.
const backpressure = new ByteLengthQueuingStrategy({ highWaterMark: 8_000_000 })
const backpressure = new ByteLengthQueuingStrategy({
highWaterMark: 8_000_000,
})
const transport = new TransformStream<Chunk, Uint8Array>(
const transport = new TransformStream<Frame, Uint8Array>(
{
transform: (chunk: Chunk, controller) => {
transform: (frame: Frame, controller) => {
// Compute the max timestamp of the segment
this.timestamp = Math.max(chunk.timestamp + chunk.duration)
this.timestamp = Math.max(this.timestamp, frame.timestamp)
// Push the chunk to any listeners.
controller.enqueue(chunk.data)
controller.enqueue(frame.data)
},
},
undefined,

View File

@@ -1,9 +1,8 @@
import { Segment } from "./segment"
import { Notify } from "../common/async"
import { Chunk } from "./chunk"
import { Container } from "./container"
import { BroadcastConfig } from "./broadcast"
import type { BroadcastConfig } from "./broadcast"
import { Segment } from "./segment"
import type { Frame } from "../karp/frame"
import * as Audio from "./audio"
import * as Video from "./video"
@@ -36,7 +35,6 @@ export class Track {
async #runAudio(track: MediaStreamAudioTrack, config: AudioEncoderConfig) {
const source = new MediaStreamTrackProcessor({ track })
const encoder = new Audio.Encoder(config)
const container = new Container()
// Split the container at keyframe boundaries
const segments = new WritableStream({
@@ -45,13 +43,12 @@ export class Track {
abort: (e) => this.#close(e),
})
return source.readable.pipeThrough(encoder.frames).pipeThrough(container.encode).pipeTo(segments)
return source.readable.pipeThrough(encoder.frames).pipeTo(segments)
}
async #runVideo(track: MediaStreamVideoTrack, config: VideoEncoderConfig) {
const source = new MediaStreamTrackProcessor({ track })
const encoder = new Video.Encoder(config)
const container = new Container()
// Split the container at keyframe boundaries
const segments = new WritableStream({
@@ -60,18 +57,12 @@ export class Track {
abort: (e) => this.#close(e),
})
return source.readable.pipeThrough(encoder.frames).pipeThrough(container.encode).pipeTo(segments)
return source.readable.pipeThrough(encoder.frames).pipeTo(segments)
}
async #write(chunk: Chunk) {
if (chunk.type === "init") {
this.#init = chunk.data
this.#notify.wake()
return
}
async #write(frame: Frame) {
let current = this.#segments.at(-1)
if (!current || chunk.type === "key") {
if (!current || frame.type === "key") {
if (current) {
await current.input.close()
}
@@ -88,7 +79,7 @@ export class Track {
const first = this.#segments[0]
// Expire after 10s
if (chunk.timestamp - first.timestamp < 10_000_000) break
if (frame.timestamp - first.timestamp < 10_000_000) break
this.#segments.shift()
this.#offset += 1
@@ -99,7 +90,7 @@ export class Track {
const writer = current.input.getWriter()
if ((writer.desiredSize || 0) > 0) {
await writer.write(chunk)
await writer.write(frame)
} else {
console.warn("dropping chunk", writer.desiredSize)
}
@@ -147,7 +138,8 @@ export class Track {
if (this.#error) {
controller.error(this.#error)
return
} else if (this.#closed) {
}
if (this.#closed) {
controller.close()
return
}

View File

@@ -9,10 +9,10 @@
"path": "../common"
},
{
"path": "../transport"
"path": "../transfork"
},
{
"path": "../media"
"path": "../karp"
}
]
}

View File

@@ -1,3 +1,8 @@
import { Deferred } from "../common/async"
import type { Frame } from "../karp/frame"
import type { Group, Track } from "../transfork"
import { Closed } from "../transfork/error"
const SUPPORTED = [
"avc1", // H.264
"hev1", // HEVC (aka h.265)
@@ -8,10 +13,55 @@ export interface EncoderSupported {
codecs: string[]
}
export class Packer {
#source: MediaStreamTrackProcessor<VideoFrame>
#encoder: Encoder
#data: Track
#current?: Group
constructor(track: MediaStreamVideoTrack, encoder: Encoder, data: Track) {
this.#source = new MediaStreamTrackProcessor({ track })
this.#encoder = encoder
this.#data = data
}
async run() {
const output = new WritableStream({
write: (chunk) => this.#write(chunk),
close: () => this.#close(),
abort: (e) => this.#close(e),
})
return this.#source.readable.pipeThrough(this.#encoder.frames).pipeTo(output)
}
#write(frame: Frame) {
if (!this.#current || frame.type === "key") {
if (this.#current) {
this.#current.close()
}
this.#current = this.#data.appendGroup()
}
frame.encode(this.#current)
}
#close(err?: unknown) {
const closed = Closed.from(err)
if (this.#current) {
this.#current.close(closed)
}
this.#data.close(closed)
}
}
export class Encoder {
#encoder!: VideoEncoder
#encoderConfig: VideoEncoderConfig
#decoderConfig?: VideoDecoderConfig
#decoderConfig = new Deferred<VideoDecoderConfig>()
// true if we should insert a keyframe, undefined when the encoder should decide
#keyframeNext: true | undefined = true
@@ -20,7 +70,7 @@ export class Encoder {
#keyframeCounter = 0
// Converts raw rames to encoded frames.
frames: TransformStream<VideoFrame, VideoDecoderConfig | EncodedVideoChunk>
frames: TransformStream<VideoFrame, EncodedVideoChunk>
constructor(config: VideoEncoderConfig) {
config.bitrateMode ??= "constant"
@@ -53,12 +103,17 @@ export class Encoder {
return !!res.supported
}
async decoderConfig(): Promise<VideoDecoderConfig> {
return await this.#decoderConfig.promise
}
#start(controller: TransformStreamDefaultController<EncodedVideoChunk>) {
this.#encoder = new VideoEncoder({
output: (frame, metadata) => {
this.#enqueue(controller, frame, metadata)
},
error: (err) => {
this.#decoderConfig.reject(err)
throw err
},
})
@@ -77,23 +132,22 @@ export class Encoder {
}
#enqueue(
controller: TransformStreamDefaultController<VideoDecoderConfig | EncodedVideoChunk>,
controller: TransformStreamDefaultController<EncodedVideoChunk>,
frame: EncodedVideoChunk,
metadata?: EncodedVideoChunkMetadata,
) {
if (!this.#decoderConfig) {
if (this.#decoderConfig.pending) {
const config = metadata?.decoderConfig
if (!config) throw new Error("missing decoder config")
controller.enqueue(config)
this.#decoderConfig = config
this.#decoderConfig.resolve(config)
}
if (frame.type === "key") {
this.#keyframeCounter = 0
} else {
this.#keyframeCounter += 1
if (this.#keyframeCounter + this.#encoder.encodeQueueSize >= 2 * this.#encoderConfig.framerate!) {
const framesPerGop = this.#encoderConfig.framerate ? 2 * this.#encoderConfig.framerate : 60
if (this.#keyframeCounter + this.#encoder.encodeQueueSize >= framesPerGop) {
this.#keyframeNext = true
}
}