From baa43608b28877dfa9f7763a2bee008ecc3764f1 Mon Sep 17 00:00:00 2001 From: Davidson Gomes Date: Wed, 15 Apr 2026 07:08:08 -0300 Subject: [PATCH 01/11] fix: add PID file lock to prevent multiple scheduler instances When systemd restarted the service, concurrent scheduler processes would pile up causing routines (e.g. morning briefing) to fire multiple times. acquire_lock() checks for a live PID on startup; release_lock() cleans up the PID file on SIGINT/SIGTERM. Co-Authored-By: Claude Sonnet 4.6 --- scheduler.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/scheduler.py b/scheduler.py index e5233b5c..b9109c28 100644 --- a/scheduler.py +++ b/scheduler.py @@ -16,6 +16,29 @@ WORKSPACE = Path(__file__).parent PYTHON = "uv run python" if os.system("command -v uv > /dev/null 2>&1") == 0 else "python3" ROUTINES_DIR = WORKSPACE / "ADWs" / "routines" +PID_FILE = WORKSPACE / "ADWs" / "logs" / "scheduler.pid" + + +def acquire_lock() -> bool: + """Ensure only one scheduler instance runs. Returns False if another is alive.""" + if PID_FILE.exists(): + try: + existing_pid = int(PID_FILE.read_text().strip()) + # Check if that process is still alive + os.kill(existing_pid, 0) + print(f" Scheduler already running (PID {existing_pid}). Exiting.") + return False + except (ProcessLookupError, ValueError): + # Stale PID file — previous instance is dead + PID_FILE.unlink(missing_ok=True) + + PID_FILE.write_text(str(os.getpid())) + return True + + +def release_lock(): + """Remove PID file on clean shutdown.""" + PID_FILE.unlink(missing_ok=True) def run_adw(name: str, script: str, args: str = ""): @@ -115,6 +138,9 @@ def main(): """Entry point — standalone scheduler.""" import schedule + if not acquire_lock(): + sys.exit(1) + print("EvoNexus Scheduler") setup_schedule() total = len(schedule.get_jobs()) @@ -122,6 +148,7 @@ def main(): print(f" Press Ctrl+C to stop\n") def shutdown(sig, frame): + release_lock() print("\n Scheduler stopped") sys.exit(0) From 0b051af1a711bfe06218274e699e50ae866d4706 Mon Sep 17 00:00:00 2001 From: Davidson Gomes Date: Thu, 16 Apr 2026 06:54:32 -0300 Subject: [PATCH 02/11] fix(scheduler): atomic PID lock to prevent duplicate instances Replace TOCTOU-prone check-then-create with O_CREAT|O_EXCL atomic open. Prevents multiple schedulers from starting simultaneously during rapid restarts, which caused routines (review-todoist, git-sync) to fire multiple times and send duplicate Telegram messages. Co-Authored-By: Claude Sonnet 4.6 --- scheduler.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/scheduler.py b/scheduler.py index b9109c28..b3ee922a 100644 --- a/scheduler.py +++ b/scheduler.py @@ -20,20 +20,36 @@ def acquire_lock() -> bool: - """Ensure only one scheduler instance runs. Returns False if another is alive.""" - if PID_FILE.exists(): + """Ensure only one scheduler instance runs. Returns False if another is alive. + + Uses O_CREAT|O_EXCL for atomic creation, then validates the PID inside. + Avoids the TOCTOU race where two processes both see a stale PID file and + both proceed to start. + """ + import fcntl + try: + fd = os.open(str(PID_FILE), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644) + os.write(fd, str(os.getpid()).encode()) + os.close(fd) + return True + except FileExistsError: + # File exists — check if the owner is still alive try: existing_pid = int(PID_FILE.read_text().strip()) - # Check if that process is still alive os.kill(existing_pid, 0) print(f" Scheduler already running (PID {existing_pid}). Exiting.") return False except (ProcessLookupError, ValueError): - # Stale PID file — previous instance is dead + # Stale lock — remove and retry once PID_FILE.unlink(missing_ok=True) - - PID_FILE.write_text(str(os.getpid())) - return True + try: + fd = os.open(str(PID_FILE), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644) + os.write(fd, str(os.getpid()).encode()) + os.close(fd) + return True + except FileExistsError: + print(" Scheduler lock contention — another instance just started. Exiting.") + return False def release_lock(): From b7bfab50c9ada812e5bc22a828206cfcec7f1d39 Mon Sep 17 00:00:00 2001 From: Davidson Gomes Date: Thu, 16 Apr 2026 06:58:00 -0300 Subject: [PATCH 03/11] fix(dashboard): restart-all kills processes directly instead of systemctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit systemctl restart on Type=oneshot + KillMode=none doesn't reliably kill child processes. New approach: pkill processes directly then re-run start-services.sh — works without sudo and actually restarts everything. Co-Authored-By: Claude Sonnet 4.6 --- dashboard/backend/routes/services.py | 40 +++++++++++++++++----------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/dashboard/backend/routes/services.py b/dashboard/backend/routes/services.py index 7af8d773..5a6ee801 100644 --- a/dashboard/backend/routes/services.py +++ b/dashboard/backend/routes/services.py @@ -122,28 +122,38 @@ def run_routine(routine_id): @bp.route("/api/services/restart-all", methods=["POST"]) def restart_all_services(): - """Restart the EvoNexus systemd service (dashboard + scheduler + terminal-server). - Spawns the restart with a delay so the HTTP response can be sent first.""" - import shutil - if not shutil.which("systemctl"): - return jsonify({"error": "systemctl not available (not running as systemd service)"}), 400 + """Restart all EvoNexus services (dashboard + scheduler + terminal-server). - # Check if the service exists - result = subprocess.run( - ["systemctl", "is-enabled", "evo-nexus"], - capture_output=True, text=True + Kills processes directly and re-runs start-services.sh, bypassing + 'systemctl restart' which doesn't reliably kill children on Type=oneshot + services with KillMode=none. + """ + import shutil + import os + workspace = str(WORKSPACE) + start_script = os.path.join(workspace, "start-services.sh") + + if not os.path.exists(start_script): + return jsonify({"error": "start-services.sh not found"}), 400 + + # Kill existing processes then re-run start-services.sh. + # sleep 2 gives Flask time to send this response before app.py dies. + cmd = ( + "sleep 2 && " + "pkill -f 'terminal-server/bin/server.js' 2>/dev/null; " + "pkill -f 'python.*scheduler.py' 2>/dev/null; " + "pkill -f 'python.*app.py' 2>/dev/null; " + "sleep 1 && " + f"bash {start_script}" ) - if result.returncode != 0: - return jsonify({"error": "evo-nexus service not found. Run: sudo bash install-service.sh"}), 400 - - # Spawn restart with delay so this response can be sent subprocess.Popen( - ["bash", "-c", "sleep 2 && systemctl restart evo-nexus"], + ["bash", "-c", cmd], start_new_session=True, + cwd=workspace, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) - return jsonify({"status": "restarting", "message": "Service will restart in ~2 seconds"}) + return jsonify({"status": "restarting", "message": "Services will restart in ~3 seconds"}) TELEGRAM_LOG = f"{WORKSPACE_STR}/ADWs/logs/telegram.log" From 77826155d722b8847c093386cc728acf20495581 Mon Sep 17 00:00:00 2001 From: Davidson Gomes Date: Thu, 16 Apr 2026 09:18:08 -0300 Subject: [PATCH 04/11] fix(heartbeat): pass prompt as positional arg instead of -p flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude CLI does not have a -p flag — prompt must be passed as a positional argument. The old -p flag caused the YAML frontmatter (---) to be interpreted as an unknown CLI option, failing all heartbeats with: unknown option '---\nname: "zara-cs"'. Co-Authored-By: Claude Sonnet 4.6 --- dashboard/backend/heartbeat_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dashboard/backend/heartbeat_runner.py b/dashboard/backend/heartbeat_runner.py index ef2cdcea..d247911f 100644 --- a/dashboard/backend/heartbeat_runner.py +++ b/dashboard/backend/heartbeat_runner.py @@ -239,7 +239,8 @@ def step7_invoke_claude( "--print", "--max-turns", str(max_turns), "--dangerously-skip-permissions", - "-p", prompt, + "--output-format", "json", + prompt, # positional argument — Claude CLI does not have a -p flag ] start_time = time.time() From e2ae123c583627f55f17a8d3a65cdc0bab63cdb8 Mon Sep 17 00:00:00 2001 From: Davidson Gomes Date: Fri, 17 Apr 2026 13:20:53 -0300 Subject: [PATCH 05/11] fix(scheduler): remove duplicate scheduler thread from app.py The dashboard was running scheduler.py both as a standalone process (via start-services.sh) and as an embedded thread (app.py), causing every routine to fire 2-3x per trigger. Removed the _run_scheduler thread; kept a lightweight _poll_scheduled_tasks thread for one-off ScheduledTask DB entries only. feat(fin): add Evo Academy as revenue source in financial skills fin-daily-pulse, fin-weekly-report and fin-monthly-close-kickoff now pull data from the Evo Academy Analytics API (summary, orders, subs) and consolidate it alongside Stripe and Omie. MRR, daily revenue and P&L now reflect courses, subscriptions and Summit tickets. Co-Authored-By: Claude Sonnet 4.6 --- .claude/skills/fin-daily-pulse/SKILL.md | 41 ++++++++++++-- .../skills/fin-monthly-close-kickoff/SKILL.md | 44 ++++++++++----- .claude/skills/fin-weekly-report/SKILL.md | 34 ++++++++++-- dashboard/backend/app.py | 53 ++++++------------- 4 files changed, 116 insertions(+), 56 deletions(-) diff --git a/.claude/skills/fin-daily-pulse/SKILL.md b/.claude/skills/fin-daily-pulse/SKILL.md index 9a51c28f..20a32dac 100644 --- a/.claude/skills/fin-daily-pulse/SKILL.md +++ b/.claude/skills/fin-daily-pulse/SKILL.md @@ -1,11 +1,11 @@ --- name: fin-daily-pulse -description: "Daily financial pulse — queries Stripe (MRR, charges, churn, failures) and Omie (accounts payable/receivable, invoices) to generate an HTML snapshot of the company's financial health. Trigger when user says 'financial pulse', 'financial snapshot', or 'financial metrics'." +description: "Daily financial pulse — queries Stripe (MRR, charges, churn, failures), Omie (accounts payable/receivable, invoices) and Evo Academy (courses, subscriptions, Summit tickets) to generate an HTML snapshot of the company's financial health. Trigger when user says 'financial pulse', 'financial snapshot', or 'financial metrics'." --- # Financial Pulse — Daily Financial Snapshot -Daily routine that pulls data from Stripe and Omie to generate an HTML snapshot of financial health. +Daily routine that pulls data from Stripe, Omie and Evo Academy to generate an HTML snapshot of financial health. **Always respond in English.** @@ -47,15 +47,49 @@ Use the `/int-omie` skill to fetch: - Fetch invoices pending issuance - Count invoices issued in the current month + +## Step 2.5 — Collect Evo Academy data (silently) + +Call the Evo Academy Analytics API directly: +- **Base URL:** `$EVO_ACADEMY_BASE_URL` (env var) +- **Auth:** `Authorization: Bearer $EVO_ACADEMY_API_KEY` + +### 2.5a. Summary do dia +``` +GET /api/v1/analytics/summary?period=today +``` +Captura: `revenue.total`, `orders.completed`, `orders.pending`, `orders.failed`, `subscriptions.active`, `students.new_in_period` + +### 2.5b. Orders completados hoje +``` +GET /api/v1/analytics/orders?status=completed&created_after=YYYY-MM-DD&per_page=100 +``` +(hoje em BRT; converter para UTC → `created_after = date.today().isoformat()`) +- Itere paginação por cursor até `meta.has_more = false` +- Some `amount` de todos os orders → receita bruta Evo Academy do dia +- Separe por tipo: renovações (`is_renewal=true`) vs novos (`is_renewal=false`) +- Agrupe por produto: cursos, assinaturas, ingressos, outros + +### 2.5c. MRR de assinaturas ativas (Evo Academy) +``` +GET /api/v1/analytics/subscriptions?status=active&per_page=100 +``` +- Itere até `meta.has_more = false` +- Some `plan.price` de cada assinatura ativa → MRR Evo Academy + ## Step 3 — Day's transactions Consolidate all financial transactions for the day: - Stripe charges (revenue) +- Evo Academy orders (revenue — courses / subscriptions / tickets) - Payments recorded in Omie (expenses) - Refunds Format each transaction with: type (Revenue/Expense/Refund), description, amount, status. +**Total revenue = Stripe today + Evo Academy today** +**Total MRR = Stripe MRR + Evo Academy MRR** + ## Step 4 — Classify financial health Define the health badge (CSS class): @@ -105,7 +139,8 @@ Create the directory `workspace/finance/reports/daily/` if it does not exist. ## Financial Pulse generated **File:** workspace/finance/reports/daily/[C] YYYY-MM-DD-financial-pulse.html -**MRR:** R$ X,XXX | **Subscriptions:** N | **Churn:** X% +**MRR total:** R$ X,XXX (Stripe: R$ X,XXX | Evo Academy: R$ X,XXX) +**Receita hoje:** R$ X,XXX | **Subscriptions:** N | **Churn:** X% **Alerts:** {N} attention points ``` diff --git a/.claude/skills/fin-monthly-close-kickoff/SKILL.md b/.claude/skills/fin-monthly-close-kickoff/SKILL.md index 64ffad92..a62548f3 100644 --- a/.claude/skills/fin-monthly-close-kickoff/SKILL.md +++ b/.claude/skills/fin-monthly-close-kickoff/SKILL.md @@ -1,6 +1,6 @@ --- name: fin-monthly-close-kickoff -description: "Monthly close kickoff — initiates the month-end closing process with a checklist, simplified P&L, pending reconciliations, receivables, payables, and action items for the finance team. Trigger when user says 'monthly close', 'start closing', 'closing kickoff', or on the 1st of each month." +description: "Monthly close kickoff — initiates the month-end closing process with a checklist, simplified P&L (Stripe + Omie + Evo Academy), pending reconciliations, receivables, payables, and action items for the finance team. Trigger when user says 'monthly close', 'start closing', 'closing kickoff', or on the 1st of each month." --- # Monthly Close Kickoff @@ -33,14 +33,30 @@ Use `/int-omie`: - Invoices issued during the month - Invoices that should have been issued but were not -### 2c. Outstanding receivables + +### 2c. Revenue (Evo Academy) +Call `GET /api/v1/analytics/summary?period=30d` (env: `$EVO_ACADEMY_BASE_URL`, auth: `Bearer $EVO_ACADEMY_API_KEY`): +- `revenue.total` → receita bruta do mês +- `orders.completed / pending / refunded` → contagem por status +- `subscriptions.active / cancelled` → base e churn do mês + +Fetch todos os orders do mês: `GET /api/v1/analytics/orders?status=completed&created_after=YYYY-MM-01&created_before=YYYY-MM-31&per_page=100` +- Itere por cursor até `has_more=false` +- Some `amount` → receita total do mês +- Separe por produto: Evo Academy (R$950/mês), Evolution Builder (R$970/mês), Curso Agentic Engineer (R$2k/mês), Beta Access (R$370/mês), one-time (Blueprint Pack, Fast Start Pro), Evo Setup (R$5/mês) +- Identifique renovações (`is_renewal=true`) vs novos clientes + +Fetch assinaturas ativas no fim do mês: `GET /api/v1/analytics/subscriptions?status=active&per_page=100` +- MRR Evo Academy = soma de `plan.price` das ativas + +### 2d. Outstanding receivables - List all open receivables (from the month or earlier) - Highlight overdue items -### 2d. Next month's payables +### 2e. Next month's payables - List payables due in the current month (the upcoming month) -### 2e. Previous month (for comparison) +### 2f. Previous month (for comparison) - Read the previous month's financial report from `workspace/finance/reports/monthly/` if it exists - Or use data from the last monthly close @@ -51,6 +67,7 @@ Structure the income statement with: | Account | Actual | Prior Month | Variance | |---------|--------|-------------|----------| | Gross Revenue (Stripe) | | | | +| Gross Revenue (Evo Academy) | | | | | Gross Revenue (Omie/Services) | | | | | (-) Taxes | | | | | **Net Revenue** | | | | @@ -68,14 +85,15 @@ Structure the income statement with: Generate a checklist with initial status for each item: 1. **Reconcile Stripe** — verify all charges match received payments -2. **Reconcile Omie** — verify entries and exits in the ERP are correct -3. **Issue pending invoices** — list invoices that need to be issued (finance team) -4. **Collect overdue accounts** — list clients with late payments -5. **Categorize expenses** — verify all expenses are categorized -6. **Review entries** — verify manual or atypical entries -7. **Calculate taxes** — verify month's tax obligations -8. **Generate final income statement** — after reconciliations, generate the definitive P&L -9. **Approve close** — the responsible person reviews and approves +2. **Reconcile Evo Academy** — verify orders and subscriptions match expected MRR +3. **Reconcile Omie** — verify entries and exits in the ERP are correct +4. **Issue pending invoices** — list invoices that need to be issued (finance team) +5. **Collect overdue accounts** — list clients with late payments +6. **Categorize expenses** — verify all expenses are categorized +7. **Review entries** — verify manual or atypical entries +8. **Calculate taxes** — verify month's tax obligations +9. **Generate final income statement** — after reconciliations, generate the definitive P&L +10. **Approve close** — the responsible person reviews and approves Possible statuses: - `done` (checkmark) — already completed automatically @@ -161,7 +179,7 @@ Create the directory `workspace/finance/reports/monthly/` if it does not exist. **File:** workspace/finance/reports/monthly/[C] YYYY-MM-monthly-close.html **Month:** {reference month} **Revenue:** R$ X,XXX | **Expenses:** R$ X,XXX | **Result:** R$ X,XXX -**Checklist:** X/9 completed +**Checklist:** X/10 completed **Finance team pending items:** {N} items ``` diff --git a/.claude/skills/fin-weekly-report/SKILL.md b/.claude/skills/fin-weekly-report/SKILL.md index b89291cd..ef3c85f2 100644 --- a/.claude/skills/fin-weekly-report/SKILL.md +++ b/.claude/skills/fin-weekly-report/SKILL.md @@ -1,11 +1,11 @@ --- name: fin-weekly-report -description: "Weekly financial report — consolidates Stripe and Omie data for the week: revenue, expenses, cash flow projection, overdue accounts, and variance analysis. Trigger when user says 'financial weekly', 'weekly financial report', or 'financial summary of the week'." +description: "Weekly financial report — consolidates Stripe, Omie and Evo Academy data for the week: revenue (courses, subscriptions, tickets), expenses, cash flow projection, overdue accounts, and variance analysis. Trigger when user says 'financial weekly', 'weekly financial report', or 'financial summary of the week'." --- # Financial Weekly — Weekly Financial Report -Weekly routine that consolidates the week's financial data: revenue, expenses, Stripe, Omie, projected cash flow, and analysis. +Weekly routine that consolidates the week's financial data: revenue, expenses, Stripe, Omie, Evo Academy, projected cash flow, and analysis. **Always respond in English.** @@ -24,8 +24,25 @@ Use `/int-omie` to fetch: - Confirmed receipts for the week - Invoices issued during the week + +### 1c. Evo Academy — revenue +Call `GET /api/v1/analytics/summary?period=7d` (env: `$EVO_ACADEMY_BASE_URL`, auth: `Bearer $EVO_ACADEMY_API_KEY`): +- `revenue.total` → receita bruta da semana +- `orders.completed` → número de vendas +- `subscriptions.active` / `subscriptions.cancelled` → net change + +Fetch orders da semana: `GET /api/v1/analytics/orders?status=completed&created_after=YYYY-MM-DD&per_page=100` +- Itere por cursor até `has_more=false` +- Some `amount` → receita total Evo Academy na semana +- Separe: renovações vs novos, one-time vs assinatura + +Fetch assinaturas novas na semana: `GET /api/v1/analytics/subscriptions?status=active&created_after=YYYY-MM-DD&per_page=100` +- MRR adicionado = soma dos `plan.price` de assinaturas criadas na semana + Group revenue by category: - Stripe Subscriptions +- Evo Academy — Courses & Subscriptions +- Evo Academy — One-time (tickets, packs) - Services / Consulting - Partnerships - Other @@ -59,10 +76,19 @@ Consolidate the week's Omie metrics: - Invoices issued during the week - Confirmed receipts +## Step 4.5 — Detailed Evo Academy metrics + +Consolidate Evo Academy's week metrics: +- MRR (sum of all active subscription `plan.price`) and variance vs prior week +- New subscriptions vs cancellations +- One-time revenue (tickets, packs, live events) +- Top-selling products of the week +- Students enrolled (`students.new_in_period`) + ## Step 5 — Cash flow projection (4 weeks) Based on collected data, project: -- Expected inflows (Stripe recurring + receivables) +- Expected inflows (Stripe recurring + Evo Academy subscriptions + receivables) - Expected outflows (payables + recurring expenses) - Balance and cumulative by week @@ -135,7 +161,7 @@ Create the directory `workspace/finance/reports/weekly/` if it does not exist. **File:** workspace/finance/reports/weekly/[C] YYYY-WXX-financial-weekly.html **Revenue:** R$ X,XXX ({var}%) | **Expenses:** R$ X,XXX ({var}%) -**MRR:** R$ X,XXX | **Projected 30d balance:** R$ XX,XXX +**MRR total:** R$ X,XXX (Stripe: R$ X,XXX | Evo Academy: R$ X,XXX) | **Projected 30d balance:** R$ XX,XXX **Alerts:** {N} overdue accounts | {N} pending invoices ``` diff --git a/dashboard/backend/app.py b/dashboard/backend/app.py index 278a5b32..62aa0312 100644 --- a/dashboard/backend/app.py +++ b/dashboard/backend/app.py @@ -637,13 +637,16 @@ def serve_frontend(path): port = int(cfg["port"]) except Exception: pass - # Start scheduler in background thread + # Scheduler runs as a standalone process (scheduler.py) started by start-services.sh. + # A thread here would create a duplicate instance — all routines would fire 2-3x. + # One-off scheduled tasks (ScheduledTask model) are checked by the standalone scheduler + # via _run_pending_tasks, which is called from its own loop. import threading + def _run_pending_tasks(): """Check for pending scheduled tasks and execute them.""" from datetime import datetime as _dt, timezone as _tz from models import ScheduledTask - from routes.tasks import _execute_task try: now = _dt.now(_tz.utc) @@ -659,45 +662,23 @@ def _run_pending_tasks(): t = threading.Thread(target=_execute_task_with_context, args=(task.id,), daemon=True) t.start() - except Exception as e: - pass # Don't crash scheduler loop on task errors + except Exception: + pass def _execute_task_with_context(task_id): with app.app_context(): from routes.tasks import _execute_task _execute_task(task_id) - def _run_scheduler(): - log_path = WORKSPACE / "ADWs" / "logs" / "scheduler.log" - log_path.parent.mkdir(parents=True, exist_ok=True) - try: - import importlib.util - spec = importlib.util.spec_from_file_location("scheduler", WORKSPACE / "scheduler.py") - sched_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(sched_module) - sched_module.setup_schedule() - - import schedule as sched_lib - import time as _time - from datetime import datetime as _dt - - with open(log_path, "a") as log: - log.write(f"\n[{_dt.now().strftime('%Y-%m-%d %H:%M:%S')}] Scheduler started ({len(sched_lib.get_jobs())} routines)\n") - log.flush() - - while True: - sched_lib.run_pending() - # Check for one-off scheduled tasks - with app.app_context(): - _run_pending_tasks() - _time.sleep(30) - except Exception as e: - with open(log_path, "a") as log: - log.write(f"Scheduler error: {e}\n") - print(f"Scheduler failed to start: {e}") - - sched_thread = threading.Thread(target=_run_scheduler, daemon=True, name="scheduler") - sched_thread.start() - print(f" Scheduler started in background") + def _poll_scheduled_tasks(): + """Lightweight thread that only polls ScheduledTask — no routine scheduling.""" + import time as _time + while True: + with app.app_context(): + _run_pending_tasks() + _time.sleep(30) + + task_thread = threading.Thread(target=_poll_scheduled_tasks, daemon=True, name="task-poller") + task_thread.start() app.run(host="0.0.0.0", port=port, debug=False) From d934d9c257cf1eb2f6d72cedcba3558142a48af8 Mon Sep 17 00:00:00 2001 From: Marcelo Leal Date: Sat, 18 Apr 2026 18:49:33 -0300 Subject: [PATCH 06/11] fix(swarm): three deploy bugs found during production setup 1. Add ANTHROPIC_API_KEY to ALLOWED_VARS in claude-bridge.js The env var was silently filtered out, causing Claude Code to fall back to OAuth login on every session start instead of using the API key configured in the Providers page. 2. Fix orphaned session crash ("Session already exists") When a Claude process died without firing the PTY onExit event, the session remained in the bridge's in-memory Map as inactive. The next start attempt threw "already exists". Now detects dead sessions, cleans them up, and restarts normally. 3. Exclude dashboard/data/ and workspace/ from Docker build context Without these entries in .dockerignore, the local SQLite database (with hashed passwords) and workspace files were baked into the image. On first Swarm deploy, the volume was seeded from the image, making login impossible with any other credentials. Co-Authored-By: Claude Sonnet 4.6 --- .dockerignore | 2 ++ dashboard/terminal-server/src/claude-bridge.js | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index a4adbae1..f1c74a2b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,6 +3,8 @@ .env __pycache__/ *.pyc +dashboard/data/ +workspace/ ADWs/logs/ ADWs/__pycache__/ .claude/agent-memory/ diff --git a/dashboard/terminal-server/src/claude-bridge.js b/dashboard/terminal-server/src/claude-bridge.js index 8260f0b3..34c70aa6 100644 --- a/dashboard/terminal-server/src/claude-bridge.js +++ b/dashboard/terminal-server/src/claude-bridge.js @@ -15,6 +15,7 @@ class ClaudeBridge { _loadProviderConfig() { const ALLOWED_CLI = new Set(['claude', 'openclaude']); const ALLOWED_VARS = new Set([ + 'ANTHROPIC_API_KEY', 'CLAUDE_CODE_USE_OPENAI', 'CLAUDE_CODE_USE_GEMINI', 'CLAUDE_CODE_USE_BEDROCK', 'CLAUDE_CODE_USE_VERTEX', 'OPENAI_BASE_URL', 'OPENAI_API_KEY', 'OPENAI_MODEL', @@ -138,7 +139,15 @@ class ClaudeBridge { async startSession(sessionId, options = {}) { if (this.sessions.has(sessionId)) { - throw new Error(`Session ${sessionId} already exists`); + const existing = this.sessions.get(sessionId); + if (existing.active) { + throw new Error(`Session ${sessionId} already exists`); + } + // Orphaned dead session — clean up and restart + if (existing.process) { + try { existing.process.kill('SIGKILL'); } catch (_) {} + } + this.sessions.delete(sessionId); } const { From 7da1ca1fe0b28bbbce24d02c629120068159fcc7 Mon Sep 17 00:00:00 2001 From: Marcelo Leal Date: Sat, 18 Apr 2026 19:28:20 -0300 Subject: [PATCH 07/11] fix(swarm): add claude-auth volume and fix docker-compose for dashboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add evonexus_claude_auth:/root/.claude to all three Swarm services (dashboard, telegram, scheduler) so Claude Code OAuth tokens persist across redeploys — avoids re-authentication on every deploy - docker-compose.yml: use Dockerfile.swarm.dashboard, expose terminal port 32352, add claude-auth volume, fix config mount (remove :ro so providers.json can be written by the UI) Co-Authored-By: Claude Sonnet 4.6 --- docker-compose.yml | 13 +-- evonexus.portainer.stack.yml | 166 +++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+), 5 deletions(-) create mode 100644 evonexus.portainer.stack.yml diff --git a/docker-compose.yml b/docker-compose.yml index 1b01017c..7050e9aa 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,17 +3,19 @@ services: dashboard: build: context: . - dockerfile: Dockerfile.dashboard + dockerfile: Dockerfile.swarm.dashboard container_name: evonexus-dashboard ports: - - "${EVONEXUS_PORT:-8080}:8080" + - "8081:8080" + - "32352:32352" env_file: .env environment: - TZ=America/Sao_Paulo - EVONEXUS_PORT=8080 + - TERMINAL_SERVER_PORT=32352 volumes: - - ./.env:/workspace/.env:ro - - ./config:/workspace/config:ro + - claude-auth:/root/.claude + - ./config:/workspace/config - ./workspace:/workspace/workspace - ./dashboard/data:/workspace/dashboard/data - ./.claude/agents:/workspace/.claude/agents:ro @@ -21,7 +23,7 @@ services: - ./.claude/commands:/workspace/.claude/commands:ro - ./.claude/templates:/workspace/.claude/templates:ro - ./memory:/workspace/memory:ro - - ./ADWs/logs:/workspace/ADWs/logs:ro + - ./ADWs/logs:/workspace/ADWs/logs restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/api/version"] @@ -79,6 +81,7 @@ services: - manual volumes: + claude-auth: daily-logs: projects: community: diff --git a/evonexus.portainer.stack.yml b/evonexus.portainer.stack.yml new file mode 100644 index 00000000..fd641579 --- /dev/null +++ b/evonexus.portainer.stack.yml @@ -0,0 +1,166 @@ +version: "3.8" + +# ============================================================ +# EvoNexus - Stack de Produção (Docker Swarm / Portainer) +# +# Baseado na documentação oficial do projeto: +# - README.swarm.md +# - evonexus.stack.yml +# +# Ajustado para este servidor: +# - Domínio: evonexus.advancedbot.com.br +# - Rede Traefik: network_public +# - Entrypoint Traefik: websecure +# - Certresolver: letsencryptresolver +# +# Antes do deploy: +# 1. Garanta que a rede `network_public` já exista no Swarm +# 2. As imagens publicadas no Docker Hub usam a tag latest: +# - marcelolealhub/evo-nexus-dashboard:latest +# - marcelolealhub/evo-nexus-runtime:latest +# ============================================================ + +services: + + evonexus_dashboard: + image: marcelolealhub/evo-nexus-dashboard:latest + + volumes: + - evonexus_config:/workspace/config + - evonexus_workspace:/workspace/workspace + - evonexus_dashboard_data:/workspace/dashboard/data + - evonexus_memory:/workspace/memory + - evonexus_adw_logs:/workspace/ADWs/logs + - evonexus_agent_memory:/workspace/.claude/agent-memory + - evonexus_claude_auth:/root/.claude + - evonexus_codex_auth:/root/.codex + + networks: + - network_public + + environment: + - TZ=America/Sao_Paulo + - EVONEXUS_PORT=8080 + - TERMINAL_SERVER_PORT=32352 + - FORWARDED_ALLOW_IPS=* + + deploy: + mode: replicated + replicas: 1 + restart_policy: + condition: on-failure + placement: + constraints: + - node.role == manager + resources: + limits: + cpus: "1" + memory: 1024M + labels: + - traefik.enable=true + - traefik.docker.network=network_public + + - traefik.http.routers.evonexus.rule=Host(`evonexus.advancedbot.com.br`) + - traefik.http.routers.evonexus.entrypoints=websecure + - traefik.http.routers.evonexus.priority=1 + - traefik.http.routers.evonexus.tls.certresolver=letsencryptresolver + - traefik.http.routers.evonexus.service=evonexus + - traefik.http.services.evonexus.loadbalancer.server.port=8080 + - traefik.http.services.evonexus.loadbalancer.passHostHeader=true + + - traefik.http.routers.evonexus-terminal.rule=Host(`evonexus.advancedbot.com.br`) && PathPrefix(`/terminal`) + - traefik.http.routers.evonexus-terminal.entrypoints=websecure + - traefik.http.routers.evonexus-terminal.priority=10 + - traefik.http.routers.evonexus-terminal.tls.certresolver=letsencryptresolver + - traefik.http.routers.evonexus-terminal.service=evonexus-terminal + - traefik.http.routers.evonexus-terminal.middlewares=evonexus-terminal-strip + - traefik.http.middlewares.evonexus-terminal-strip.stripprefix.prefixes=/terminal + - traefik.http.services.evonexus-terminal.loadbalancer.server.port=32352 + - traefik.http.services.evonexus-terminal.loadbalancer.passHostHeader=true + + evonexus_telegram: + image: marcelolealhub/evo-nexus-runtime:latest + command: + - "claude" + - "--channels" + - "plugin:telegram@claude-plugins-official" + - "--dangerously-skip-permissions" + + volumes: + - evonexus_config:/workspace/config + - evonexus_workspace:/workspace/workspace + - evonexus_memory:/workspace/memory + - evonexus_adw_logs:/workspace/ADWs/logs + - evonexus_agent_memory:/workspace/.claude/agent-memory + - evonexus_claude_auth:/root/.claude + - evonexus_codex_auth:/root/.codex + + networks: + - network_public + + environment: + - TZ=America/Sao_Paulo + - REQUIRE_ANTHROPIC_KEY=1 + + stdin_open: true + tty: true + + deploy: + mode: replicated + replicas: 1 + restart_policy: + condition: on-failure + placement: + constraints: + - node.role == manager + resources: + limits: + cpus: "1" + memory: 1024M + + evonexus_scheduler: + image: marcelolealhub/evo-nexus-runtime:latest + command: ["uv", "run", "python", "scheduler.py"] + + volumes: + - evonexus_config:/workspace/config + - evonexus_workspace:/workspace/workspace + - evonexus_memory:/workspace/memory + - evonexus_adw_logs:/workspace/ADWs/logs + - evonexus_agent_memory:/workspace/.claude/agent-memory + - evonexus_claude_auth:/root/.claude + - evonexus_codex_auth:/root/.codex + + networks: + - network_public + + environment: + - TZ=America/Sao_Paulo + - REQUIRE_ANTHROPIC_KEY=1 + + deploy: + mode: replicated + replicas: 1 + restart_policy: + condition: on-failure + placement: + constraints: + - node.role == manager + resources: + limits: + cpus: "1" + memory: 1024M + +volumes: + evonexus_config: + evonexus_workspace: + evonexus_dashboard_data: + evonexus_memory: + evonexus_adw_logs: + evonexus_agent_memory: + evonexus_claude_auth: + evonexus_codex_auth: + +networks: + network_public: + external: true From 61bd3a4c83164a227fafa99bcfe1b3278e5da88c Mon Sep 17 00:00:00 2001 From: Marcelo Leal Date: Sat, 18 Apr 2026 19:33:06 -0300 Subject: [PATCH 08/11] fix(swarm): add claude_auth volume to official stack template Add evonexus_claude_auth:/root/.claude to all three services in evonexus.stack.yml so Claude Code OAuth tokens persist across redeploys. Same fix applied to evonexus.portainer.stack.yml in the previous commit. Co-Authored-By: Claude Sonnet 4.6 --- evonexus.stack.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/evonexus.stack.yml b/evonexus.stack.yml index a329183b..ef0401c4 100644 --- a/evonexus.stack.yml +++ b/evonexus.stack.yml @@ -32,6 +32,7 @@ services: - evonexus_memory:/workspace/memory - evonexus_adw_logs:/workspace/ADWs/logs - evonexus_agent_memory:/workspace/.claude/agent-memory + - evonexus_claude_auth:/root/.claude - evonexus_codex_auth:/root/.codex networks: @@ -96,6 +97,7 @@ services: - evonexus_memory:/workspace/memory - evonexus_adw_logs:/workspace/ADWs/logs - evonexus_agent_memory:/workspace/.claude/agent-memory + - evonexus_claude_auth:/root/.claude - evonexus_codex_auth:/root/.codex networks: @@ -155,6 +157,7 @@ volumes: evonexus_memory: evonexus_adw_logs: evonexus_agent_memory: + evonexus_claude_auth: evonexus_codex_auth: networks: From 1c858e6ece57ed82a19f255ed6387c072ec54afa Mon Sep 17 00:00:00 2001 From: Marcelo Leal Date: Sat, 18 Apr 2026 20:09:05 -0300 Subject: [PATCH 09/11] fix(dashboard): three more bugs found in production after F1.4 redeploy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug 1 — Theme picker on every agent Each agent runs in its own working directory, which Claude Code treats as a separate project. Without a global theme set, the user is asked to choose a theme on every single agent terminal. Pre-seed /root/.claude/settings.json with theme + onboarding flags during container startup so the first-run prompts are skipped. Only writes the file if it doesn't exist (preserves user-chosen overrides). Bug 2 — "Session already exists" error toast The previous fix only cleaned up *inactive* orphans. The actual production trigger is different: when a WebSocket reconnects through Traefik, the frontend can re-send start_claude before learning the session is still alive. The bridge's startSession then threw on a duplicate active session. Make startSession idempotent: if the session is already active, return the existing entry instead of throwing. Bug 3 — Misleading error on duplicate start Server.startClaude() responded with type:'error' "An agent is already running" when the session was active. From the user's perspective this looked like a failure even though everything was working. Send type:'claude_started' instead so the frontend updates UI to "running" and replays the buffer. Co-Authored-By: Claude Sonnet 4.6 --- .../terminal-server/src/claude-bridge.js | 9 ++++++++- dashboard/terminal-server/src/server.js | 6 +++++- start-dashboard.sh | 20 +++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/dashboard/terminal-server/src/claude-bridge.js b/dashboard/terminal-server/src/claude-bridge.js index 34c70aa6..16079126 100644 --- a/dashboard/terminal-server/src/claude-bridge.js +++ b/dashboard/terminal-server/src/claude-bridge.js @@ -141,7 +141,14 @@ class ClaudeBridge { if (this.sessions.has(sessionId)) { const existing = this.sessions.get(sessionId); if (existing.active) { - throw new Error(`Session ${sessionId} already exists`); + // Idempotent: a duplicate startSession can arrive when the WebSocket + // reconnects through a reverse proxy (Traefik) and the frontend + // re-sends start_claude before learning the session is still alive. + // Returning the existing session instead of throwing prevents a + // confusing "Session already exists" toast on the user's terminal + // while keeping the original PTY intact. + console.log(`[bridge] startSession(${sessionId}) — already active, returning existing session`); + return existing; } // Orphaned dead session — clean up and restart if (existing.process) { diff --git a/dashboard/terminal-server/src/server.js b/dashboard/terminal-server/src/server.js index c3482e47..69a21846 100644 --- a/dashboard/terminal-server/src/server.js +++ b/dashboard/terminal-server/src/server.js @@ -778,7 +778,11 @@ class TerminalServer { if (!session) return; if (session.active) { - this.sendToWebSocket(wsInfo.ws, { type: 'error', message: 'An agent is already running in this session' }); + // Frontend may re-send start_claude on WebSocket reconnect (common + // through reverse proxies like Traefik). The session is already + // running — replay the buffer and tell the client it's attached + // instead of surfacing a misleading error toast. + this.sendToWebSocket(wsInfo.ws, { type: 'claude_started', sessionId: wsInfo.claudeSessionId }); return; } diff --git a/start-dashboard.sh b/start-dashboard.sh index 21f52614..a78bfc63 100755 --- a/start-dashboard.sh +++ b/start-dashboard.sh @@ -22,6 +22,26 @@ FLASK_PORT="${EVONEXUS_PORT:-8080}" echo "[start-dashboard] terminal-server on :${TERMINAL_PORT}, Flask on :${FLASK_PORT}" +# ---------------------------------------------------------------------------- +# Pre-seed Claude Code global settings so the first-run theme/onboarding +# prompts are skipped on every new agent terminal. Each agent runs in its +# own working directory, which Claude Code treats as a separate project — +# without this, the user has to pick a theme on every single agent. +# Only writes the file if it doesn't already exist (preserves user choices). +# ---------------------------------------------------------------------------- +mkdir -p /root/.claude +if [ ! -f /root/.claude/settings.json ]; then + echo "[start-dashboard] seeding /root/.claude/settings.json with default theme" + cat > /root/.claude/settings.json <<'EOF' +{ + "theme": "dark", + "hasCompletedOnboarding": true, + "hasSeenWelcome": true, + "telemetry": false +} +EOF +fi + # Start terminal-server in the background node /workspace/dashboard/terminal-server/bin/server.js --port "${TERMINAL_PORT}" & TERMINAL_PID=$! From bc06c5ee6c59ab96c2b8be3ab2075a83e7a35858 Mon Sep 17 00:00:00 2001 From: Marcelo Leal Date: Sat, 18 Apr 2026 22:19:22 -0300 Subject: [PATCH 10/11] fix(swarm): restore /root/.claude.json from backup on container start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude Code stores its main config at /root/.claude.json — a SIBLING of the /root/.claude/ directory, not inside it. The Swarm volume mounts /root/.claude/ only, so .claude.json sits in the container's writable layer and is wiped on every redeploy. Result: theme picker and onboarding reappear on every release, even though the OAuth tokens (in /root/.claude/) survive. Claude Code itself writes timestamped backups into /root/.claude/backups/ (which IS in the volume), so we just need to restore the latest one on startup when the main file is missing. If no backup exists either, seed a minimal config so first-run prompts are skipped. Co-Authored-By: Claude Sonnet 4.6 --- start-dashboard.sh | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/start-dashboard.sh b/start-dashboard.sh index a78bfc63..b56848bf 100755 --- a/start-dashboard.sh +++ b/start-dashboard.sh @@ -42,6 +42,39 @@ if [ ! -f /root/.claude/settings.json ]; then EOF fi +# ---------------------------------------------------------------------------- +# Restore /root/.claude.json from the most recent backup when missing. +# +# Claude Code's main config (theme, OAuth tokens, per-project state) lives +# at /root/.claude.json — a SIBLING of the /root/.claude/ directory, NOT +# inside it. The Swarm volume mounts /root/.claude/, so /root/.claude.json +# sits in the container's writable layer and is wiped on every redeploy. +# Result: theme picker and onboarding reappear on every release. +# +# Claude Code itself writes timestamped backups into /root/.claude/backups/ +# (which IS in the volume). We just need to restore the latest on startup +# if the main file is missing. If no backup exists either, seed a minimal +# config so the first-run prompts are skipped. +# ---------------------------------------------------------------------------- +if [ ! -f /root/.claude.json ]; then + latest_backup=$(ls -t /root/.claude/backups/.claude.json.backup.* 2>/dev/null | head -n1 || true) + if [ -n "${latest_backup:-}" ] && [ -f "${latest_backup}" ]; then + echo "[start-dashboard] restoring /root/.claude.json from ${latest_backup}" + cp "${latest_backup}" /root/.claude.json + else + echo "[start-dashboard] seeding minimal /root/.claude.json (no backup found)" + cat > /root/.claude.json <<'EOF' +{ + "theme": "dark", + "hasCompletedOnboarding": true, + "hasSeenWelcome": true, + "bypassPermissionsModeAccepted": true, + "telemetry": false +} +EOF + fi +fi + # Start terminal-server in the background node /workspace/dashboard/terminal-server/bin/server.js --port "${TERMINAL_PORT}" & TERMINAL_PID=$! From ff1b7709d93c175acd4cbb044ae89446c88dde66 Mon Sep 17 00:00:00 2001 From: Marcelo Leal Date: Sat, 18 Apr 2026 23:21:11 -0300 Subject: [PATCH 11/11] fix(dashboard): copy .claude/ and docs/ into image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Dockerfile only copied dashboard/backend/, social-auth/, scheduler.py and the built frontend. .claude/ (agents, skills, commands, templates, rules) and docs/ were never copied, so on a fresh deploy the backend's WORKSPACE / ".claude" / "agents" path was empty. Result: /api/agents, /api/skills, /api/commands and /api/templates all returned empty lists, and the UI showed "No agents found — Add agent files to .claude/agents/ to get started" on every clean Swarm deploy. Local development worked because uv runs the backend with cwd at the repo root, where .claude/ and docs/ exist. .claude/agent-memory and .claude/.env stay excluded by .dockerignore so user data and secrets remain out of the image. Co-Authored-By: Claude Sonnet 4.6 --- Dockerfile.dashboard | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Dockerfile.dashboard b/Dockerfile.dashboard index e817bf07..523f16d0 100644 --- a/Dockerfile.dashboard +++ b/Dockerfile.dashboard @@ -40,6 +40,14 @@ COPY dashboard/backend/ dashboard/backend/ COPY social-auth/ social-auth/ COPY scheduler.py ./ +# Copy workspace assets the backend reads at runtime. +# Without these, /api/agents, /api/skills, /api/commands etc. all return empty +# and the UI shows "No agents found" / "No skills found" on a fresh deploy. +# .claude/agent-memory and .claude/.env are excluded by .dockerignore so user +# data and secrets stay out of the image. +COPY .claude/ .claude/ +COPY docs/ docs/ + # Copy built frontend from stage 1 COPY --from=frontend-build /frontend/dist dashboard/frontend/dist