Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from django.db import migrations

from firetower.incidents.tasks import SCHEDULES


def create_schedule(apps, schema_editor):
Schedule = apps.get_model("django_q", "Schedule")
schedule_name = "archive_stale_channels"
Schedule.objects.get_or_create(
name=schedule_name, defaults=SCHEDULES[schedule_name]
)


def delete_schedule(apps, schema_editor):
Schedule = apps.get_model("django_q", "Schedule")
schedule_name = "archive_stale_channels"
Schedule.objects.filter(name=schedule_name).delete()


class Migration(migrations.Migration):
dependencies = [
("incidents", "0018_add_action_item_model"),
("django_q", "0018_task_success_index"),
]

operations = [
migrations.RunPython(create_schedule, delete_schedule),
]
136 changes: 133 additions & 3 deletions src/firetower/incidents/tasks.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,33 @@
import functools
import logging
import re
import time
from typing import Protocol

from datadog import statsd
from django_q.tasks import Schedule

from firetower.incidents.models import Incident
from firetower.incidents.models import (
ExternalLink,
ExternalLinkType,
Incident,
IncidentStatus,
)
from firetower.integrations.services.slack import SlackService

SCHEDULES = {
"schedule_demo": {
"func": "firetower.incidents.tasks.schedule_demo",
"schedule_type": Schedule.MINUTES, # Minutes
"schedule_type": Schedule.MINUTES,
"minutes": 5,
"repeats": -1, # repeat indefinitely
"repeats": -1,
},
"archive_stale_channels": {
"func": "firetower.incidents.tasks.archive_stale_channels",
"schedule_type": Schedule.DAILY,
Comment thread
github-actions[bot] marked this conversation as resolved.
"repeats": -1,
Comment thread
github-actions[bot] marked this conversation as resolved.
Comment thread
github-actions[bot] marked this conversation as resolved.
},
}

Check failure on line 30 in src/firetower/incidents/tasks.py

View check run for this annotation

@sentry/warden / warden: find-bugs

API error in get_channel_history silently returns [] causing valid channels to be archived

When `get_channel_history` fails mid-pagination (network error, rate limit, or `ok=False`), it catches the exception and returns `[]` instead of propagating the error. `archive_stale_channels` then sees an empty message list, treats the channel as stale, and archives it — potentially destroying an active incident channel.

DATADOG_INVALID_CHARS = re.compile(r"[^A-Za-z0-9-_.\/]")

Expand Down Expand Up @@ -60,6 +72,124 @@
return wrapper


ARCHIVE_NOTICE = (
"This channel is being archived by Firetower because all message history "
"has been removed by the workspace retention policy and there doesn't "
"appear to be any active discussions."
)

ARCHIVE_CHANNEL_DELAY_SECONDS = 2


@datadog_log
def archive_stale_channels() -> None:
slack = SlackService()
if not slack.client:
logger.error(
"Slack client not initialized -- disabling archive_stale_channels schedule"
)
Schedule.objects.filter(name="archive_stale_channels").update(repeats=0)
return

own_bot_id = slack.bot_id
if not own_bot_id:
logger.error("Could not determine own bot ID, aborting archive run")
return

terminal_statuses = [IncidentStatus.DONE, IncidentStatus.CANCELLED]
links = ExternalLink.objects.filter(
type=ExternalLinkType.SLACK,
incident__status__in=terminal_statuses,
).select_related("incident")

scanned = 0
archived = 0
skipped = 0
errored = 0

for i, link in enumerate(links):
if i > 0:
time.sleep(ARCHIVE_CHANNEL_DELAY_SECONDS)

scanned += 1
channel_id = slack.parse_channel_id_from_url(link.url)
if not channel_id:
skipped += 1
continue

try:
info = slack.get_channel_info(channel_id)
if info is None:
logger.warning(
f"Could not fetch info for channel {channel_id} "
f"(incident {link.incident.incident_number}), skipping"
)
skipped += 1
continue

if info.get("is_archived"):
skipped += 1
continue

messages = slack.get_channel_history(channel_id)
non_own_messages = [
msg for msg in messages if msg.get("bot_id") != own_bot_id
]
Comment thread
sentry[bot] marked this conversation as resolved.
if non_own_messages:
skipped += 1
continue

Check failure on line 140 in src/firetower/incidents/tasks.py

View check run for this annotation

@sentry/warden / warden: find-bugs

[QZB-783] API error in get_channel_history silently returns [] causing valid channels to be archived (additional location)

When `get_channel_history` fails mid-pagination (network error, rate limit, or `ok=False`), it catches the exception and returns `[]` instead of propagating the error. `archive_stale_channels` then sees an empty message list, treats the channel as stale, and archives it — potentially destroying an active incident channel.
Comment thread
rgibert marked this conversation as resolved.
Comment thread
rgibert marked this conversation as resolved.

has_thread_activity = False
for msg in messages:
if msg.get("reply_count", 0) > 0:
replies = slack.get_thread_replies(channel_id, msg["ts"])
if replies:
has_thread_activity = True
break
if has_thread_activity:
skipped += 1
continue

notice_ts = slack.post_message(channel_id, ARCHIVE_NOTICE)
Comment thread
github-actions[bot] marked this conversation as resolved.
if not notice_ts:
logger.error(
f"Failed to post archive notice to channel {channel_id} "
f"(incident {link.incident.incident_number}), skipping archive"
)
errored += 1
continue

try:
if not slack.archive_channel(channel_id):
raise RuntimeError(
f"archive_channel returned False for {channel_id}"
)
archived += 1

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

History failures trigger wrongful archive

High Severity

The archive_stale_channels task may incorrectly archive Slack channels. SlackService.get_channel_history and get_thread_replies silently return empty or partial results on Slack API errors. This causes the task to misinterpret channels as having no human activity, leading to premature archival of channels with active discussions.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 2fae4e2. Configure here.

logger.info(
f"Archived stale channel {channel_id} "
f"(incident {link.incident.incident_number})"
)
except Exception:
errored += 1
logger.exception(
f"Failed to archive channel {channel_id} "
f"(incident {link.incident.incident_number}), "
f"deleting notice"
)
slack.delete_message(channel_id, notice_ts)
except Exception:
errored += 1
logger.exception(
f"Error processing channel {channel_id} "
f"(incident {link.incident.incident_number})"
)

logger.info(
f"archive_stale_channels complete: "
f"scanned={scanned} archived={archived} skipped={skipped} errored={errored}"
)


@datadog_log
def schedule_demo() -> None:
incident = Incident.objects.order_by("-created_at").first()
Expand Down
Loading
Loading