mirror of
https://github.com/TauricResearch/TradingAgents.git
synced 2026-05-01 14:33:10 +03:00
feat: add LangGraph checkpoint resume for crash recovery (#594)
Long analyses can take many minutes; a crash or interruption forced users to re-run from scratch and re-pay every LLM call. This adds an opt-in checkpoint layer backed by per-ticker SQLite databases so the graph resumes from the last successful node. How to use: - CLI: tradingagents analyze --checkpoint - CLI: tradingagents analyze --clear-checkpoints - Python: config["checkpoint_enabled"] = True Lifecycle: - propagate() recompiles the graph with a SqliteSaver when enabled and injects a deterministic thread_id derived from ticker+date so the same ticker+date resumes while a different date starts fresh. - On successful completion the per-thread checkpoint rows are cleared. - The context manager is closed in a try/finally so a crash never leaks the SQLite connection or leaves the graph in checkpoint mode. Storage: ~/.tradingagents/cache/checkpoints/<TICKER>.db (override via TRADINGAGENTS_CACHE_DIR). The checkpointer module is new (tradingagents/graph/checkpointer.py) and the GraphSetup now returns the uncompiled workflow so it can be recompiled with a saver when needed. Adds langgraph-checkpoint-sqlite>=2.0.0 dependency. 3 new tests verify the crash/resume cycle and that a different date starts fresh.
This commit is contained in:
22
cli/main.py
22
cli/main.py
@@ -926,7 +926,7 @@ def format_tool_args(args, max_length=80) -> str:
|
||||
return result[:max_length - 3] + "..."
|
||||
return result
|
||||
|
||||
def run_analysis():
|
||||
def run_analysis(checkpoint: bool = False):
|
||||
# First get all user selections
|
||||
selections = get_user_selections()
|
||||
|
||||
@@ -943,6 +943,7 @@ def run_analysis():
|
||||
config["openai_reasoning_effort"] = selections.get("openai_reasoning_effort")
|
||||
config["anthropic_effort"] = selections.get("anthropic_effort")
|
||||
config["output_language"] = selections.get("output_language", "English")
|
||||
config["checkpoint_enabled"] = checkpoint
|
||||
|
||||
# Create stats callback handler for tracking LLM/tool calls
|
||||
stats_handler = StatsCallbackHandler()
|
||||
@@ -1197,8 +1198,23 @@ def run_analysis():
|
||||
|
||||
|
||||
@app.command()
|
||||
def analyze():
|
||||
run_analysis()
|
||||
def analyze(
|
||||
checkpoint: bool = typer.Option(
|
||||
False,
|
||||
"--checkpoint",
|
||||
help="Enable checkpoint/resume: save state after each node so a crashed run can resume.",
|
||||
),
|
||||
clear_checkpoints: bool = typer.Option(
|
||||
False,
|
||||
"--clear-checkpoints",
|
||||
help="Delete all saved checkpoints before running (force fresh start).",
|
||||
),
|
||||
):
|
||||
if clear_checkpoints:
|
||||
from tradingagents.graph.checkpointer import clear_all_checkpoints
|
||||
n = clear_all_checkpoints(DEFAULT_CONFIG["data_cache_dir"])
|
||||
console.print(f"[yellow]Cleared {n} checkpoint(s).[/yellow]")
|
||||
run_analysis(checkpoint=checkpoint)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user