From 2469a1672d2addf9b0dc7b472aae70c23956a793 Mon Sep 17 00:00:00 2001 From: Richard Gibert Date: Fri, 22 May 2026 10:58:10 -0400 Subject: [PATCH 1/8] feat(async): Add daily task to archive stale Slack channels Add a django-q2 scheduled task that runs daily to find bot-created Slack channels where the workspace retention policy has deleted all message history. For each stale channel, posts an archival notice and archives the channel. Changes: - Add archive_channel() method and limit param to get_channel_history on SlackService - Add archive_stale_channels task with per-channel error isolation - Self-disables the schedule if Slack client is unavailable - Add is_archived to get_channel_info return dict - Data migration to register the schedule Co-Authored-By: Claude Agent transcript: https://claudescope.sentry.dev/share/SeyXSRtESuCIjsSjmOGgO3MM_MZHcZftkU8Td81uFP8 --- ...026-05-22-archive-stale-channels-design.md | 85 ++++++++++ .../0019_schedule_archive_stale_channels.py | 28 ++++ src/firetower/incidents/tasks.py | 86 +++++++++- src/firetower/incidents/tests/test_tasks.py | 158 +++++++++++++++++- src/firetower/integrations/services/slack.py | 39 ++++- .../integrations/tests/test_slack.py | 81 +++++++++ 6 files changed, 470 insertions(+), 7 deletions(-) create mode 100644 docs/superpowers/specs/2026-05-22-archive-stale-channels-design.md create mode 100644 src/firetower/incidents/migrations/0019_schedule_archive_stale_channels.py diff --git a/docs/superpowers/specs/2026-05-22-archive-stale-channels-design.md b/docs/superpowers/specs/2026-05-22-archive-stale-channels-design.md new file mode 100644 index 00000000..288920f2 --- /dev/null +++ b/docs/superpowers/specs/2026-05-22-archive-stale-channels-design.md @@ -0,0 +1,85 @@ +# Archive Stale Slack Channels + +## Summary + +Add a daily django-q2 scheduled task that finds bot-created Slack channels where Slack's workspace retention policy has deleted all message history, posts a notice explaining the channel is being archived due to inactivity, and archives the channel. + +## Scope + +- Included: all Slack channels tracked via `ExternalLink(type=SLACK)` records, both public and private. +- Excluded: channels not created by Firetower (no ExternalLink record), channels that still have any message history. + +## Detection Logic + +A channel is considered "stale" when ALL of these are true: + +1. An `ExternalLink(type=SLACK)` record exists for it. +2. `conversations_info` confirms the channel exists and `is_archived` is false. +3. `conversations_history(limit=1)` returns zero messages -- meaning Slack's retention policy has wiped all content. + +If `conversations_info` fails (channel deleted, bot removed, etc.), skip it and log a warning. Do not treat API errors as "no history." + +## Archive Flow Per Channel + +1. Post a message: "This channel is being archived by Firetower because all message history has been removed by the workspace retention policy and there doesn't appear to be any active discussions." +2. Call `conversations_archive` to archive the channel. +3. Log the channel ID and incident number. + +## New Code + +### `SlackService.archive_channel(channel_id)` -- `src/firetower/integrations/services/slack.py` + +Wraps `conversations_archive`. Returns bool. Logs errors on `SlackApiError`. + +### `archive_stale_channels()` -- `src/firetower/incidents/tasks.py` + +Decorated with `@datadog_log`. Steps: + +1. If `SlackService` has no client (missing bot token), disable this schedule in django-q and return early. +2. Query `ExternalLink.objects.filter(type=ExternalLinkType.SLACK)` and select_related incident. +3. For each link, parse channel ID via `SlackService.parse_channel_id_from_url`. +4. Call `conversations_info` -- skip if channel is already archived or API errors. +5. Call `conversations_history` with `limit=1` -- if messages list is non-empty, skip. +6. Post the archival notice via `SlackService.post_message`. +7. Archive via `SlackService.archive_channel`. + +Use a single `SlackService` instance for the entire task run. Do not fail the whole task if one channel errors -- log and continue. + +### `SCHEDULES` entry + +```python +"archive_stale_channels": { + "func": "firetower.incidents.tasks.archive_stale_channels", + "schedule_type": Schedule.DAILY, + "repeats": -1, +} +``` + +### Migration `0019_schedule_archive_stale_channels` + +Data migration following the same pattern as `0016_schedule_demo.py`. Creates the django-q Schedule record. + +### `SlackService.get_channel_history` change + +Add an optional `limit` parameter (default `None` = current behavior of paginating everything). When `limit` is set, pass it directly and do not paginate. The archive task calls this with `limit=1`. + +## Observability + +- `@datadog_log` decorator provides `django_q.task.run`, `django_q.task.success`, `django_q.task.error` metrics. +- Per-run counters logged at INFO level: channels scanned, archived, skipped, errored. + +## Error Handling + +- Single channel failure must not abort the task. Catch `SlackApiError` per channel, log, continue. +- If `SlackService` has no client (missing bot token), disable this schedule via `Schedule.objects.filter(name="archive_stale_channels").update(repeats=0)`, log an error, and return. + +## Testing + +- Unit test `archive_stale_channels` with mocked `SlackService`: + - Channel with no history -> archived (post + archive called). + - Channel with history -> skipped. + - Channel already archived -> skipped. + - Channel API error -> skipped with error logged. + - No Slack client -> schedule disabled. +- Unit test `SlackService.archive_channel`. +- Unit test `get_channel_history` with `limit` parameter. diff --git a/src/firetower/incidents/migrations/0019_schedule_archive_stale_channels.py b/src/firetower/incidents/migrations/0019_schedule_archive_stale_channels.py new file mode 100644 index 00000000..95b0eea0 --- /dev/null +++ b/src/firetower/incidents/migrations/0019_schedule_archive_stale_channels.py @@ -0,0 +1,28 @@ +from django.db import migrations + +from firetower.incidents.tasks import SCHEDULES + + +def create_schedule(apps, schema_editor): + Schedule = apps.get_model("django_q", "Schedule") + schedule_name = "archive_stale_channels" + Schedule.objects.get_or_create( + name=schedule_name, defaults=SCHEDULES[schedule_name] + ) + + +def delete_schedule(apps, schema_editor): + Schedule = apps.get_model("django_q", "Schedule") + schedule_name = "archive_stale_channels" + Schedule.objects.filter(name=schedule_name).delete() + + +class Migration(migrations.Migration): + dependencies = [ + ("incidents", "0018_add_action_item_model"), + ("django_q", "0018_task_success_index"), + ] + + operations = [ + migrations.RunPython(create_schedule, delete_schedule), + ] diff --git a/src/firetower/incidents/tasks.py b/src/firetower/incidents/tasks.py index 6df99481..08856516 100644 --- a/src/firetower/incidents/tasks.py +++ b/src/firetower/incidents/tasks.py @@ -6,14 +6,20 @@ from datadog import statsd from django_q.tasks import Schedule -from firetower.incidents.models import Incident +from firetower.incidents.models import ExternalLink, ExternalLinkType, Incident +from firetower.integrations.services.slack import SlackService SCHEDULES = { "schedule_demo": { "func": "firetower.incidents.tasks.schedule_demo", - "schedule_type": Schedule.MINUTES, # Minutes + "schedule_type": Schedule.MINUTES, "minutes": 5, - "repeats": -1, # repeat indefinitely + "repeats": -1, + }, + "archive_stale_channels": { + "func": "firetower.incidents.tasks.archive_stale_channels", + "schedule_type": Schedule.DAILY, + "repeats": -1, }, } @@ -60,6 +66,80 @@ def wrapper() -> None: return wrapper +ARCHIVE_NOTICE = ( + "This channel is being archived by Firetower because all message history " + "has been removed by the workspace retention policy and there doesn't " + "appear to be any active discussions." +) + + +@datadog_log +def archive_stale_channels() -> None: + slack = SlackService() + if not slack.client: + logger.error( + "Slack client not initialized -- disabling archive_stale_channels schedule" + ) + Schedule.objects.filter(name="archive_stale_channels").update(repeats=0) + return + + links = ExternalLink.objects.filter(type=ExternalLinkType.SLACK).select_related( + "incident" + ) + + scanned = 0 + archived = 0 + skipped = 0 + errored = 0 + + for link in links: + scanned += 1 + channel_id = slack.parse_channel_id_from_url(link.url) + if not channel_id: + skipped += 1 + continue + + try: + info = slack.get_channel_info(channel_id) + if info is None: + logger.warning( + f"Could not fetch info for channel {channel_id} " + f"(incident {link.incident.incident_number}), skipping" + ) + skipped += 1 + continue + + if info.get("is_archived"): + skipped += 1 + continue + + messages = slack.get_channel_history(channel_id, limit=1) + if messages: + skipped += 1 + continue + + slack.post_message(channel_id, ARCHIVE_NOTICE) + if slack.archive_channel(channel_id): + archived += 1 + logger.info( + f"Archived stale channel {channel_id} " + f"(incident {link.incident.incident_number})" + ) + else: + errored += 1 + except Exception: + errored += 1 + logger.exception( + f"Error processing channel {channel_id} " + f"(incident {link.incident.incident_number})" + ) + + logger.info( + f"archive_stale_channels complete: " + f"scanned={scanned} archived={archived} skipped={skipped} errored={errored}" + ) + + @datadog_log def schedule_demo() -> None: incident = Incident.objects.order_by("-created_at").first() diff --git a/src/firetower/incidents/tests/test_tasks.py b/src/firetower/incidents/tests/test_tasks.py index 109a0f84..c12fa28c 100644 --- a/src/firetower/incidents/tests/test_tasks.py +++ b/src/firetower/incidents/tests/test_tasks.py @@ -1,8 +1,21 @@ from unittest.mock import MagicMock, call, patch import pytest - -from firetower.incidents.tasks import datadog_log, schedule_demo +from django_q.models import Schedule + +from firetower.incidents.models import ( + ExternalLink, + ExternalLinkType, + Incident, + IncidentSeverity, + IncidentStatus, +) +from firetower.incidents.tasks import ( + ARCHIVE_NOTICE, + archive_stale_channels, + datadog_log, + schedule_demo, +) class TestDatadogLogTaskName: @@ -139,3 +152,144 @@ def test_shows_title_for_public_incident(self, mock_incident_cls, mock_statsd): logged = mock_logger.info.call_args[0][0] assert "Public outage" in logged + + +@pytest.mark.django_db +class TestArchiveStaleChannels: + def _make_incident(self, **kwargs): + defaults = { + "title": "Test Incident", + "status": IncidentStatus.DONE, + "severity": IncidentSeverity.P2, + } + defaults.update(kwargs) + return Incident.objects.create(**defaults) + + def _make_link(self, incident, channel_id="C12345"): + return ExternalLink.objects.create( + incident=incident, + type=ExternalLinkType.SLACK, + url=f"https://sentry.slack.com/archives/{channel_id}", + ) + + def test_archives_channel_with_no_history(self): + incident = self._make_incident() + self._make_link(incident, "C_EMPTY") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.parse_channel_id_from_url.return_value = "C_EMPTY" + mock_slack.get_channel_info.return_value = { + "id": "C_EMPTY", + "name": "inc-2000", + "is_private": False, + "is_archived": False, + } + mock_slack.get_channel_history.return_value = [] + mock_slack.archive_channel.return_value = True + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.post_message.assert_called_once_with("C_EMPTY", ARCHIVE_NOTICE) + mock_slack.archive_channel.assert_called_once_with("C_EMPTY") + + def test_skips_channel_with_history(self): + incident = self._make_incident() + self._make_link(incident, "C_ACTIVE") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.parse_channel_id_from_url.return_value = "C_ACTIVE" + mock_slack.get_channel_info.return_value = { + "id": "C_ACTIVE", + "name": "inc-2001", + "is_private": False, + "is_archived": False, + } + mock_slack.get_channel_history.return_value = [ + {"type": "message", "text": "still here", "ts": "1.0"} + ] + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.post_message.assert_not_called() + mock_slack.archive_channel.assert_not_called() + + def test_skips_already_archived_channel(self): + incident = self._make_incident() + self._make_link(incident, "C_ARCHIVED") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.parse_channel_id_from_url.return_value = "C_ARCHIVED" + mock_slack.get_channel_info.return_value = { + "id": "C_ARCHIVED", + "name": "inc-2002", + "is_private": False, + "is_archived": True, + } + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.get_channel_history.assert_not_called() + mock_slack.archive_channel.assert_not_called() + + def test_skips_channel_on_api_error(self): + incident = self._make_incident() + self._make_link(incident, "C_ERROR") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.parse_channel_id_from_url.return_value = "C_ERROR" + mock_slack.get_channel_info.return_value = None + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.get_channel_history.assert_not_called() + mock_slack.archive_channel.assert_not_called() + + def test_disables_schedule_when_no_client(self): + schedule = Schedule.objects.get(name="archive_stale_channels") + assert schedule.repeats == -1 + + mock_slack = MagicMock() + mock_slack.client = None + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + schedule.refresh_from_db() + assert schedule.repeats == 0 + + def test_continues_on_single_channel_exception(self): + inc1 = self._make_incident() + inc2 = self._make_incident() + self._make_link(inc1, "C_BAD") + self._make_link(inc2, "C_GOOD") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.parse_channel_id_from_url.side_effect = ( + lambda url: "C_BAD" if "C_BAD" in url else "C_GOOD" + ) + mock_slack.get_channel_info.side_effect = lambda cid: ( + (_ for _ in ()).throw(Exception("boom")) + if cid == "C_BAD" + else { + "id": "C_GOOD", + "name": "inc-x", + "is_private": False, + "is_archived": False, + } + ) + mock_slack.get_channel_history.return_value = [] + mock_slack.archive_channel.return_value = True + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.archive_channel.assert_called_once_with("C_GOOD") diff --git a/src/firetower/integrations/services/slack.py b/src/firetower/integrations/services/slack.py index 72c8b434..c4551ee9 100644 --- a/src/firetower/integrations/services/slack.py +++ b/src/firetower/integrations/services/slack.py @@ -349,6 +349,7 @@ def get_channel_info(self, channel_id: str) -> dict | None: "id": channel.get("id", ""), "name": channel.get("name", ""), "is_private": channel.get("is_private", False), + "is_archived": channel.get("is_archived", False), } except SlackApiError as e: logger.error( @@ -409,10 +410,28 @@ def get_user_info(self, slack_user_id: str) -> dict | None: ) return None - def get_channel_history(self, channel_id: str) -> list[dict[str, Any]]: - """Return all messages from a channel, paginating automatically.""" + def get_channel_history( + self, channel_id: str, limit: int | None = None + ) -> list[dict[str, Any]]: + """Return messages from a channel. When *limit* is set, fetch at most + that many messages in a single API call (no pagination). When *limit* + is ``None``, paginate to retrieve all messages.""" if not self.client: return [] + if limit is not None: + try: + response = self.client.conversations_history( + channel=channel_id, limit=limit + ) + except Exception: + logger.exception("Failed to fetch history for channel %s", channel_id) + return [] + if not response.get("ok"): + logger.error( + "conversations_history returned not-ok for channel %s", channel_id + ) + return [] + return response.get("messages", []) messages: list[dict[str, Any]] = [] cursor: str | None = None while True: @@ -436,6 +455,22 @@ def get_channel_history(self, channel_id: str) -> list[dict[str, Any]]: break return messages + def archive_channel(self, channel_id: str) -> bool: + if not self.client: + logger.warning("Cannot archive channel - Slack client not initialized") + return False + + try: + logger.info(f"Archiving channel {channel_id}") + self.client.conversations_archive(channel=channel_id) + return True + except SlackApiError as e: + logger.error( + f"Error archiving channel: {e}", + extra={"channel_id": channel_id}, + ) + return False + def get_thread_replies( self, channel_id: str, thread_ts: str ) -> list[dict[str, Any]]: diff --git a/src/firetower/integrations/tests/test_slack.py b/src/firetower/integrations/tests/test_slack.py index edbac5da..a2869f80 100644 --- a/src/firetower/integrations/tests/test_slack.py +++ b/src/firetower/integrations/tests/test_slack.py @@ -590,3 +590,84 @@ def test_pin_message_api_error(self): mock_response = MagicMock() mock_client.pins_add.side_effect = SlackApiError("error", mock_response) assert service.pin_message("C12345", "1234567890.123456") is False + + def test_archive_channel_success(self): + service, mock_client = self._make_service() + assert service.archive_channel("C12345") is True + mock_client.conversations_archive.assert_called_once_with(channel="C12345") + + def test_archive_channel_no_client(self): + mock_slack_config = {"BOT_TOKEN": None, "TEAM_ID": "sentry"} + with patch.object(settings, "SLACK", mock_slack_config): + service = SlackService() + assert service.archive_channel("C12345") is False + + def test_archive_channel_api_error(self): + service, mock_client = self._make_service() + mock_response = MagicMock() + mock_client.conversations_archive.side_effect = SlackApiError( + "error", mock_response + ) + assert service.archive_channel("C12345") is False + + def test_get_channel_history_with_limit(self): + service, mock_client = self._make_service() + mock_client.conversations_history.return_value = { + "ok": True, + "messages": [{"type": "message", "text": "hello", "ts": "1.0"}], + } + + messages = service.get_channel_history("C123", limit=1) + + assert len(messages) == 1 + mock_client.conversations_history.assert_called_once_with( + channel="C123", limit=1 + ) + + def test_get_channel_history_with_limit_no_pagination(self): + service, mock_client = self._make_service() + mock_client.conversations_history.return_value = { + "ok": True, + "has_more": True, + "messages": [{"type": "message", "text": "hello", "ts": "1.0"}], + "response_metadata": {"next_cursor": "cur1"}, + } + + messages = service.get_channel_history("C123", limit=1) + + assert len(messages) == 1 + assert mock_client.conversations_history.call_count == 1 + + def test_get_channel_history_with_limit_empty(self): + service, mock_client = self._make_service() + mock_client.conversations_history.return_value = { + "ok": True, + "messages": [], + } + + messages = service.get_channel_history("C123", limit=1) + + assert messages == [] + + def test_get_channel_history_with_limit_error(self): + service, mock_client = self._make_service() + mock_client.conversations_history.side_effect = Exception("timeout") + + messages = service.get_channel_history("C123", limit=1) + + assert messages == [] + + def test_get_channel_info_includes_is_archived(self): + service, mock_client = self._make_service() + mock_client.conversations_info.return_value = { + "channel": { + "id": "C12345", + "name": "inc-2000", + "is_private": False, + "is_archived": True, + } + } + + info = service.get_channel_info("C12345") + assert info is not None + assert info["is_archived"] is True From c57a49fa5aa226b95f65420b8b5fd57531b78972 Mon Sep 17 00:00:00 2001 From: Richard Gibert Date: Fri, 22 May 2026 11:13:53 -0400 Subject: [PATCH 2/8] fix(async): Handle archive failures and history API errors Three bugs fixed: 1. get_channel_history(limit=1) swallowed exceptions and returned [], which the task treated as "no messages" and archived active channels. Now exceptions propagate and the per-channel handler skips the channel. 2. The archival notice was posted before calling archive_channel. If archiving failed, the notice remained in the channel misleading users. Now the notice timestamp is captured and the message is deleted on failed archive. 3. A failed archive after a successful notice post would permanently prevent future archiving -- the bot's own notice became "history" causing the channel to be skipped on every subsequent run. Fixed by deleting the notice on failure (issue 2 fix). Also adds SlackService.delete_message() wrapping chat_delete. Co-Authored-By: Claude Agent transcript: https://claudescope.sentry.dev/share/Sq3ianFbApEXKqxUP2-TSgUOWGcOaf_XYnSDxYWDsKo --- src/firetower/incidents/tasks.py | 4 +- src/firetower/incidents/tests/test_tasks.py | 44 +++++++++++++++++++ src/firetower/integrations/services/slack.py | 30 ++++++++----- .../integrations/tests/test_slack.py | 8 ++-- 4 files changed, 69 insertions(+), 17 deletions(-) diff --git a/src/firetower/incidents/tasks.py b/src/firetower/incidents/tasks.py index 08856516..6b3ff96a 100644 --- a/src/firetower/incidents/tasks.py +++ b/src/firetower/incidents/tasks.py @@ -118,7 +118,7 @@ def archive_stale_channels() -> None: skipped += 1 continue - slack.post_message(channel_id, ARCHIVE_NOTICE) + notice_ts = slack.post_message(channel_id, ARCHIVE_NOTICE) if slack.archive_channel(channel_id): archived += 1 logger.info( @@ -126,6 +126,8 @@ def archive_stale_channels() -> None: f"(incident {link.incident.incident_number})" ) else: + if notice_ts: + slack.delete_message(channel_id, notice_ts) errored += 1 except Exception: errored += 1 diff --git a/src/firetower/incidents/tests/test_tasks.py b/src/firetower/incidents/tests/test_tasks.py index c12fa28c..fd838be3 100644 --- a/src/firetower/incidents/tests/test_tasks.py +++ b/src/firetower/incidents/tests/test_tasks.py @@ -293,3 +293,47 @@ def test_continues_on_single_channel_exception(self): archive_stale_channels.__wrapped__() mock_slack.archive_channel.assert_called_once_with("C_GOOD") + + def test_deletes_notice_on_failed_archive(self): + incident = self._make_incident() + self._make_link(incident, "C_FAIL") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.parse_channel_id_from_url.return_value = "C_FAIL" + mock_slack.get_channel_info.return_value = { + "id": "C_FAIL", + "name": "inc-2010", + "is_private": False, + "is_archived": False, + } + mock_slack.get_channel_history.return_value = [] + mock_slack.post_message.return_value = "1234.5678" + mock_slack.archive_channel.return_value = False + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.post_message.assert_called_once_with("C_FAIL", ARCHIVE_NOTICE) + mock_slack.delete_message.assert_called_once_with("C_FAIL", "1234.5678") + + def test_skips_channel_on_history_api_error(self): + incident = self._make_incident() + self._make_link(incident, "C_BROKEN") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.parse_channel_id_from_url.return_value = "C_BROKEN" + mock_slack.get_channel_info.return_value = { + "id": "C_BROKEN", + "name": "inc-2011", + "is_private": False, + "is_archived": False, + } + mock_slack.get_channel_history.side_effect = Exception("API error") + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.post_message.assert_not_called() + mock_slack.archive_channel.assert_not_called() diff --git a/src/firetower/integrations/services/slack.py b/src/firetower/integrations/services/slack.py index c4551ee9..231fe726 100644 --- a/src/firetower/integrations/services/slack.py +++ b/src/firetower/integrations/services/slack.py @@ -301,6 +301,21 @@ def pin_message(self, channel_id: str, message_ts: str) -> bool: ) return False + def delete_message(self, channel_id: str, message_ts: str) -> bool: + if not self.client: + logger.warning("Cannot delete message - Slack client not initialized") + return False + + try: + self.client.chat_delete(channel=channel_id, ts=message_ts) + return True + except SlackApiError as e: + logger.error( + f"Error deleting message: {e}", + extra={"channel_id": channel_id, "ts": message_ts}, + ) + return False + def add_bookmark(self, channel_id: str, title: str, link: str) -> bool: if not self.client: logger.warning("Cannot add bookmark - Slack client not initialized") @@ -419,18 +434,9 @@ def get_channel_history( if not self.client: return [] if limit is not None: - try: - response = self.client.conversations_history( - channel=channel_id, limit=limit - ) - except Exception: - logger.exception("Failed to fetch history for channel %s", channel_id) - return [] - if not response.get("ok"): - logger.error( - "conversations_history returned not-ok for channel %s", channel_id - ) - return [] + response = self.client.conversations_history( + channel=channel_id, limit=limit + ) return response.get("messages", []) messages: list[dict[str, Any]] = [] cursor: str | None = None diff --git a/src/firetower/integrations/tests/test_slack.py b/src/firetower/integrations/tests/test_slack.py index a2869f80..973673f3 100644 --- a/src/firetower/integrations/tests/test_slack.py +++ b/src/firetower/integrations/tests/test_slack.py @@ -5,6 +5,7 @@ import os from unittest.mock import MagicMock, patch +import pytest from slack_sdk.errors import SlackApiError from firetower.integrations.services.slack import SlackService @@ -649,13 +650,12 @@ def test_get_channel_history_with_limit_empty(self): assert messages == [] - def test_get_channel_history_with_limit_error(self): + def test_get_channel_history_with_limit_raises_on_error(self): service, mock_client = self._make_service() mock_client.conversations_history.side_effect = Exception("timeout") - messages = service.get_channel_history("C123", limit=1) - - assert messages == [] + with pytest.raises(Exception, match="timeout"): + service.get_channel_history("C123", limit=1) def test_get_channel_info_includes_is_archived(self): service, mock_client = self._make_service() From 331af6cb457e0b7470c2b510035e691f30f74035 Mon Sep 17 00:00:00 2001 From: Richard Gibert Date: Fri, 22 May 2026 11:27:03 -0400 Subject: [PATCH 3/8] fix(async): Add rate-limit delay and guard against silent archive Add a 2-second delay between channels to stay under Slack's Tier 3 rate limit (~50 req/min). Without this, workspaces with >25 incident channels would routinely hit rate limits on every daily run. Also skip archiving when post_message fails (returns None) instead of archiving the channel with no warning to users. Co-Authored-By: Claude Agent transcript: https://claudescope.sentry.dev/share/tRJBNFSwD_KABR2S1OuvR2T7oKVr_Hwgtfk-OUCIRyw --- src/firetower/incidents/tasks.py | 19 ++++++++++++--- src/firetower/incidents/tests/test_tasks.py | 26 +++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/firetower/incidents/tasks.py b/src/firetower/incidents/tasks.py index 6b3ff96a..4b950073 100644 --- a/src/firetower/incidents/tasks.py +++ b/src/firetower/incidents/tasks.py @@ -1,6 +1,7 @@ import functools import logging import re +import time from typing import Protocol from datadog import statsd @@ -72,6 +73,8 @@ def wrapper() -> None: "appear to be any active discussions." ) +ARCHIVE_CHANNEL_DELAY_SECONDS = 2 + @datadog_log def archive_stale_channels() -> None: @@ -92,7 +95,10 @@ def archive_stale_channels() -> None: skipped = 0 errored = 0 - for link in links: + for i, link in enumerate(links): + if i > 0: + time.sleep(ARCHIVE_CHANNEL_DELAY_SECONDS) + scanned += 1 channel_id = slack.parse_channel_id_from_url(link.url) if not channel_id: @@ -119,6 +125,14 @@ def archive_stale_channels() -> None: continue notice_ts = slack.post_message(channel_id, ARCHIVE_NOTICE) + if not notice_ts: + logger.error( + f"Failed to post archive notice to channel {channel_id} " + f"(incident {link.incident.incident_number}), skipping archive" + ) + errored += 1 + continue + if slack.archive_channel(channel_id): archived += 1 logger.info( @@ -126,8 +140,7 @@ def archive_stale_channels() -> None: f"(incident {link.incident.incident_number})" ) else: - if notice_ts: - slack.delete_message(channel_id, notice_ts) + slack.delete_message(channel_id, notice_ts) errored += 1 except Exception: errored += 1 diff --git a/src/firetower/incidents/tests/test_tasks.py b/src/firetower/incidents/tests/test_tasks.py index fd838be3..a64fb2e4 100644 --- a/src/firetower/incidents/tests/test_tasks.py +++ b/src/firetower/incidents/tests/test_tasks.py @@ -156,6 +156,11 @@ def test_shows_title_for_public_incident(self, mock_incident_cls, mock_statsd): @pytest.mark.django_db class TestArchiveStaleChannels: + @pytest.fixture(autouse=True) + def _no_sleep(self): + with patch("firetower.incidents.tasks.time.sleep"): + yield + def _make_incident(self, **kwargs): defaults = { "title": "Test Incident", @@ -337,3 +342,24 @@ def test_skips_channel_on_history_api_error(self): mock_slack.post_message.assert_not_called() mock_slack.archive_channel.assert_not_called() + + def test_skips_archive_when_post_message_fails(self): + incident = self._make_incident() + self._make_link(incident, "C_NOPOST") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.parse_channel_id_from_url.return_value = "C_NOPOST" + mock_slack.get_channel_info.return_value = { + "id": "C_NOPOST", + "name": "inc-2012", + "is_private": False, + "is_archived": False, + } + mock_slack.get_channel_history.return_value = [] + mock_slack.post_message.return_value = None + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.archive_channel.assert_not_called() From 4bc34ba8ef5c25ac822934abe5430b50b26d3400 Mon Sep 17 00:00:00 2001 From: Richard Gibert Date: Fri, 22 May 2026 11:42:56 -0400 Subject: [PATCH 4/8] fix(async): Filter bot messages from history check and fix cleanup paths Two issues fixed: 1. If delete_message failed after a failed archive, the bot's own notice remained as the only message in the channel, causing every future run to skip it permanently. Now the history check uses limit=5 and filters out bot messages (by bot_id), so leftover bot notices don't prevent future archival attempts. 2. If archive_channel raised a non-SlackApiError (e.g. ConnectionError), the exception hit the outer handler which never called delete_message. Restructured so the archive attempt has its own try/except that always reaches the notice cleanup on any failure mode. Co-Authored-By: Claude Agent transcript: https://claudescope.sentry.dev/share/niq15OUwbdrxpEKAdaoIszEmbWDRe369PsqC1_qlA9Q --- src/firetower/incidents/tasks.py | 19 +++++--- src/firetower/incidents/tests/test_tasks.py | 50 ++++++++++++++++++++- 2 files changed, 62 insertions(+), 7 deletions(-) diff --git a/src/firetower/incidents/tasks.py b/src/firetower/incidents/tasks.py index 4b950073..a630ac19 100644 --- a/src/firetower/incidents/tasks.py +++ b/src/firetower/incidents/tasks.py @@ -119,8 +119,8 @@ def archive_stale_channels() -> None: skipped += 1 continue - messages = slack.get_channel_history(channel_id, limit=1) - if messages: + messages = slack.get_channel_history(channel_id, limit=5) + if any(not msg.get("bot_id") for msg in messages): skipped += 1 continue @@ -133,15 +133,24 @@ def archive_stale_channels() -> None: errored += 1 continue - if slack.archive_channel(channel_id): + try: + if not slack.archive_channel(channel_id): + raise RuntimeError( + f"archive_channel returned False for {channel_id}" + ) archived += 1 logger.info( f"Archived stale channel {channel_id} " f"(incident {link.incident.incident_number})" ) - else: - slack.delete_message(channel_id, notice_ts) + except Exception: errored += 1 + logger.exception( + f"Failed to archive channel {channel_id} " + f"(incident {link.incident.incident_number}), " + f"deleting notice" + ) + slack.delete_message(channel_id, notice_ts) except Exception: errored += 1 logger.exception( diff --git a/src/firetower/incidents/tests/test_tasks.py b/src/firetower/incidents/tests/test_tasks.py index a64fb2e4..95c003b2 100644 --- a/src/firetower/incidents/tests/test_tasks.py +++ b/src/firetower/incidents/tests/test_tasks.py @@ -199,7 +199,7 @@ def test_archives_channel_with_no_history(self): mock_slack.post_message.assert_called_once_with("C_EMPTY", ARCHIVE_NOTICE) mock_slack.archive_channel.assert_called_once_with("C_EMPTY") - def test_skips_channel_with_history(self): + def test_skips_channel_with_human_messages(self): incident = self._make_incident() self._make_link(incident, "C_ACTIVE") @@ -213,7 +213,7 @@ def test_skips_channel_with_history(self): "is_archived": False, } mock_slack.get_channel_history.return_value = [ - {"type": "message", "text": "still here", "ts": "1.0"} + {"type": "message", "user": "U123", "text": "still here", "ts": "1.0"} ] with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): @@ -222,6 +222,30 @@ def test_skips_channel_with_history(self): mock_slack.post_message.assert_not_called() mock_slack.archive_channel.assert_not_called() + def test_archives_channel_with_only_bot_messages(self): + incident = self._make_incident() + self._make_link(incident, "C_BOTONLY") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.parse_channel_id_from_url.return_value = "C_BOTONLY" + mock_slack.get_channel_info.return_value = { + "id": "C_BOTONLY", + "name": "inc-2001", + "is_private": False, + "is_archived": False, + } + mock_slack.get_channel_history.return_value = [ + {"type": "message", "bot_id": "B123", "text": ARCHIVE_NOTICE, "ts": "1.0"} + ] + mock_slack.post_message.return_value = "2.0" + mock_slack.archive_channel.return_value = True + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.archive_channel.assert_called_once_with("C_BOTONLY") + def test_skips_already_archived_channel(self): incident = self._make_incident() self._make_link(incident, "C_ARCHIVED") @@ -343,6 +367,28 @@ def test_skips_channel_on_history_api_error(self): mock_slack.post_message.assert_not_called() mock_slack.archive_channel.assert_not_called() + def test_deletes_notice_on_archive_exception(self): + incident = self._make_incident() + self._make_link(incident, "C_THROW") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.parse_channel_id_from_url.return_value = "C_THROW" + mock_slack.get_channel_info.return_value = { + "id": "C_THROW", + "name": "inc-2013", + "is_private": False, + "is_archived": False, + } + mock_slack.get_channel_history.return_value = [] + mock_slack.post_message.return_value = "99.99" + mock_slack.archive_channel.side_effect = ConnectionError("network timeout") + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.delete_message.assert_called_once_with("C_THROW", "99.99") + def test_skips_archive_when_post_message_fails(self): incident = self._make_incident() self._make_link(incident, "C_NOPOST") From eb6b6d6abcb6e06637a2edf1a4b0777fa36a147d Mon Sep 17 00:00:00 2001 From: Richard Gibert Date: Fri, 22 May 2026 12:06:02 -0400 Subject: [PATCH 5/8] fix(async): Filter only own bot messages from history check The previous bot_id filter ignored messages from any bot, which could skip messages from other integrations that legitimately indicate channel activity. Now uses auth_test to resolve the bot's own identity and only filters messages matching that specific bot_id. Messages from other bots are treated as real activity. Also adds a cached bot_id property to SlackService backed by auth_test, called once per task run. Co-Authored-By: Claude Agent transcript: https://claudescope.sentry.dev/share/l1u4EK0km7aDME7RkCQJB5vxCzc3c61oPc4oOSn7rws --- src/firetower/incidents/tasks.py | 9 +++- src/firetower/incidents/tests/test_tasks.py | 48 ++++++++++++++++++- src/firetower/integrations/services/slack.py | 15 ++++++ .../integrations/tests/test_slack.py | 20 ++++++++ 4 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/firetower/incidents/tasks.py b/src/firetower/incidents/tasks.py index a630ac19..75a20360 100644 --- a/src/firetower/incidents/tasks.py +++ b/src/firetower/incidents/tasks.py @@ -86,6 +86,8 @@ def archive_stale_channels() -> None: Schedule.objects.filter(name="archive_stale_channels").update(repeats=0) return + own_bot_id = slack.bot_id + links = ExternalLink.objects.filter(type=ExternalLinkType.SLACK).select_related( "incident" ) @@ -120,7 +122,12 @@ def archive_stale_channels() -> None: continue messages = slack.get_channel_history(channel_id, limit=5) - if any(not msg.get("bot_id") for msg in messages): + non_own_messages = [ + msg + for msg in messages + if msg.get("bot_id") != own_bot_id or not own_bot_id + ] + if non_own_messages: skipped += 1 continue diff --git a/src/firetower/incidents/tests/test_tasks.py b/src/firetower/incidents/tests/test_tasks.py index 95c003b2..a7ffa357 100644 --- a/src/firetower/incidents/tests/test_tasks.py +++ b/src/firetower/incidents/tests/test_tasks.py @@ -177,12 +177,15 @@ def _make_link(self, incident, channel_id="C12345"): url=f"https://sentry.slack.com/archives/{channel_id}", ) + OWN_BOT_ID = "B_FIRETOWER" + def test_archives_channel_with_no_history(self): incident = self._make_incident() self._make_link(incident, "C_EMPTY") mock_slack = MagicMock() mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID mock_slack.parse_channel_id_from_url.return_value = "C_EMPTY" mock_slack.get_channel_info.return_value = { "id": "C_EMPTY", @@ -205,6 +208,7 @@ def test_skips_channel_with_human_messages(self): mock_slack = MagicMock() mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID mock_slack.parse_channel_id_from_url.return_value = "C_ACTIVE" mock_slack.get_channel_info.return_value = { "id": "C_ACTIVE", @@ -222,12 +226,13 @@ def test_skips_channel_with_human_messages(self): mock_slack.post_message.assert_not_called() mock_slack.archive_channel.assert_not_called() - def test_archives_channel_with_only_bot_messages(self): + def test_archives_channel_with_only_own_bot_messages(self): incident = self._make_incident() self._make_link(incident, "C_BOTONLY") mock_slack = MagicMock() mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID mock_slack.parse_channel_id_from_url.return_value = "C_BOTONLY" mock_slack.get_channel_info.return_value = { "id": "C_BOTONLY", @@ -236,7 +241,12 @@ def test_archives_channel_with_only_bot_messages(self): "is_archived": False, } mock_slack.get_channel_history.return_value = [ - {"type": "message", "bot_id": "B123", "text": ARCHIVE_NOTICE, "ts": "1.0"} + { + "type": "message", + "bot_id": self.OWN_BOT_ID, + "text": ARCHIVE_NOTICE, + "ts": "1.0", + } ] mock_slack.post_message.return_value = "2.0" mock_slack.archive_channel.return_value = True @@ -246,6 +256,35 @@ def test_archives_channel_with_only_bot_messages(self): mock_slack.archive_channel.assert_called_once_with("C_BOTONLY") + def test_skips_channel_with_other_bot_messages(self): + incident = self._make_incident() + self._make_link(incident, "C_OTHERBOT") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID + mock_slack.parse_channel_id_from_url.return_value = "C_OTHERBOT" + mock_slack.get_channel_info.return_value = { + "id": "C_OTHERBOT", + "name": "inc-2001", + "is_private": False, + "is_archived": False, + } + mock_slack.get_channel_history.return_value = [ + { + "type": "message", + "bot_id": "B_SOMEONE_ELSE", + "text": "alert from another bot", + "ts": "1.0", + } + ] + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.post_message.assert_not_called() + mock_slack.archive_channel.assert_not_called() + def test_skips_already_archived_channel(self): incident = self._make_incident() self._make_link(incident, "C_ARCHIVED") @@ -302,6 +341,7 @@ def test_continues_on_single_channel_exception(self): mock_slack = MagicMock() mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID mock_slack.parse_channel_id_from_url.side_effect = ( lambda url: "C_BAD" if "C_BAD" in url else "C_GOOD" ) @@ -329,6 +369,7 @@ def test_deletes_notice_on_failed_archive(self): mock_slack = MagicMock() mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID mock_slack.parse_channel_id_from_url.return_value = "C_FAIL" mock_slack.get_channel_info.return_value = { "id": "C_FAIL", @@ -352,6 +393,7 @@ def test_skips_channel_on_history_api_error(self): mock_slack = MagicMock() mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID mock_slack.parse_channel_id_from_url.return_value = "C_BROKEN" mock_slack.get_channel_info.return_value = { "id": "C_BROKEN", @@ -373,6 +415,7 @@ def test_deletes_notice_on_archive_exception(self): mock_slack = MagicMock() mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID mock_slack.parse_channel_id_from_url.return_value = "C_THROW" mock_slack.get_channel_info.return_value = { "id": "C_THROW", @@ -395,6 +438,7 @@ def test_skips_archive_when_post_message_fails(self): mock_slack = MagicMock() mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID mock_slack.parse_channel_id_from_url.return_value = "C_NOPOST" mock_slack.get_channel_info.return_value = { "id": "C_NOPOST", diff --git a/src/firetower/integrations/services/slack.py b/src/firetower/integrations/services/slack.py index 231fe726..bc11fa91 100644 --- a/src/firetower/integrations/services/slack.py +++ b/src/firetower/integrations/services/slack.py @@ -51,10 +51,25 @@ def __init__(self) -> None: ) self.client = WebClient(token=self.bot_token) if self.bot_token else None + self._bot_id: str | None = None if self.client is None: logger.warning("Slack client not initialized - missing bot token") + @property + def bot_id(self) -> str | None: + if self._bot_id is not None: + return self._bot_id + if not self.client: + return None + try: + response = self.client.auth_test() + self._bot_id = response.get("bot_id") + return self._bot_id + except SlackApiError as e: + logger.error(f"Error fetching bot identity: {e}") + return None + def get_user_profile_by_email(self, email: str) -> dict | None: """ Get user profile information from Slack by email. diff --git a/src/firetower/integrations/tests/test_slack.py b/src/firetower/integrations/tests/test_slack.py index 973673f3..3cb25664 100644 --- a/src/firetower/integrations/tests/test_slack.py +++ b/src/firetower/integrations/tests/test_slack.py @@ -671,3 +671,23 @@ def test_get_channel_info_includes_is_archived(self): info = service.get_channel_info("C12345") assert info is not None assert info["is_archived"] is True + + def test_bot_id_returns_cached_value(self): + service, mock_client = self._make_service() + mock_client.auth_test.return_value = {"bot_id": "B_TEST"} + + assert service.bot_id == "B_TEST" + assert service.bot_id == "B_TEST" + mock_client.auth_test.assert_called_once() + + def test_bot_id_returns_none_without_client(self): + mock_slack_config = {"BOT_TOKEN": None, "TEAM_ID": "sentry"} + with patch.object(settings, "SLACK", mock_slack_config): + service = SlackService() + assert service.bot_id is None + + def test_bot_id_returns_none_on_api_error(self): + service, mock_client = self._make_service() + mock_response = MagicMock() + mock_client.auth_test.side_effect = SlackApiError("error", mock_response) + assert service.bot_id is None From 04dfff014e6ce39404f3d344550ffba0b543f44e Mon Sep 17 00:00:00 2001 From: Richard Gibert Date: Fri, 22 May 2026 13:45:15 -0400 Subject: [PATCH 6/8] Delete docs/superpowers/specs/2026-05-22-archive-stale-channels-design.md --- ...026-05-22-archive-stale-channels-design.md | 85 ------------------- 1 file changed, 85 deletions(-) delete mode 100644 docs/superpowers/specs/2026-05-22-archive-stale-channels-design.md diff --git a/docs/superpowers/specs/2026-05-22-archive-stale-channels-design.md b/docs/superpowers/specs/2026-05-22-archive-stale-channels-design.md deleted file mode 100644 index 288920f2..00000000 --- a/docs/superpowers/specs/2026-05-22-archive-stale-channels-design.md +++ /dev/null @@ -1,85 +0,0 @@ -# Archive Stale Slack Channels - -## Summary - -Add a daily django-q2 scheduled task that finds bot-created Slack channels where Slack's workspace retention policy has deleted all message history, posts a notice explaining the channel is being archived due to inactivity, and archives the channel. - -## Scope - -- Included: all Slack channels tracked via `ExternalLink(type=SLACK)` records, both public and private. -- Excluded: channels not created by Firetower (no ExternalLink record), channels that still have any message history. - -## Detection Logic - -A channel is considered "stale" when ALL of these are true: - -1. An `ExternalLink(type=SLACK)` record exists for it. -2. `conversations_info` confirms the channel exists and `is_archived` is false. -3. `conversations_history(limit=1)` returns zero messages -- meaning Slack's retention policy has wiped all content. - -If `conversations_info` fails (channel deleted, bot removed, etc.), skip it and log a warning. Do not treat API errors as "no history." - -## Archive Flow Per Channel - -1. Post a message: "This channel is being archived by Firetower because all message history has been removed by the workspace retention policy and there doesn't appear to be any active discussions." -2. Call `conversations_archive` to archive the channel. -3. Log the channel ID and incident number. - -## New Code - -### `SlackService.archive_channel(channel_id)` -- `src/firetower/integrations/services/slack.py` - -Wraps `conversations_archive`. Returns bool. Logs errors on `SlackApiError`. - -### `archive_stale_channels()` -- `src/firetower/incidents/tasks.py` - -Decorated with `@datadog_log`. Steps: - -1. If `SlackService` has no client (missing bot token), disable this schedule in django-q and return early. -2. Query `ExternalLink.objects.filter(type=ExternalLinkType.SLACK)` and select_related incident. -3. For each link, parse channel ID via `SlackService.parse_channel_id_from_url`. -4. Call `conversations_info` -- skip if channel is already archived or API errors. -5. Call `conversations_history` with `limit=1` -- if messages list is non-empty, skip. -6. Post the archival notice via `SlackService.post_message`. -7. Archive via `SlackService.archive_channel`. - -Use a single `SlackService` instance for the entire task run. Do not fail the whole task if one channel errors -- log and continue. - -### `SCHEDULES` entry - -```python -"archive_stale_channels": { - "func": "firetower.incidents.tasks.archive_stale_channels", - "schedule_type": Schedule.DAILY, - "repeats": -1, -} -``` - -### Migration `0019_schedule_archive_stale_channels` - -Data migration following the same pattern as `0016_schedule_demo.py`. Creates the django-q Schedule record. - -### `SlackService.get_channel_history` change - -Add an optional `limit` parameter (default `None` = current behavior of paginating everything). When `limit` is set, pass it directly and do not paginate. The archive task calls this with `limit=1`. - -## Observability - -- `@datadog_log` decorator provides `django_q.task.run`, `django_q.task.success`, `django_q.task.error` metrics. -- Per-run counters logged at INFO level: channels scanned, archived, skipped, errored. - -## Error Handling - -- Single channel failure must not abort the task. Catch `SlackApiError` per channel, log, continue. -- If `SlackService` has no client (missing bot token), disable this schedule via `Schedule.objects.filter(name="archive_stale_channels").update(repeats=0)`, log an error, and return. - -## Testing - -- Unit test `archive_stale_channels` with mocked `SlackService`: - - Channel with no history -> archived (post + archive called). - - Channel with history -> skipped. - - Channel already archived -> skipped. - - Channel API error -> skipped with error logged. - - No Slack client -> schedule disabled. -- Unit test `SlackService.archive_channel`. -- Unit test `get_channel_history` with `limit` parameter. From f6272da18d88a1bfc3285ec81aebeb37c2a94848 Mon Sep 17 00:00:00 2001 From: Richard Gibert Date: Mon, 25 May 2026 11:00:12 -0400 Subject: [PATCH 7/8] fix(slack): Prevent premature archival of active incident channels Filter to only terminal-status incidents (Done, Cancelled), fetch full channel history instead of only the 5 most recent messages, and check thread replies for human activity before archiving. Co-Authored-By: Claude Agent transcript: https://claudescope.sentry.dev/share/et_CT8X1CKbstT_wBeP1lKRqDdU55TAd5nPbC_Yfhbc --- src/firetower/incidents/tasks.py | 28 +++- src/firetower/incidents/tests/test_tasks.py | 141 ++++++++++++++++++++ 2 files changed, 164 insertions(+), 5 deletions(-) diff --git a/src/firetower/incidents/tasks.py b/src/firetower/incidents/tasks.py index 75a20360..41597283 100644 --- a/src/firetower/incidents/tasks.py +++ b/src/firetower/incidents/tasks.py @@ -7,7 +7,12 @@ from datadog import statsd from django_q.tasks import Schedule -from firetower.incidents.models import ExternalLink, ExternalLinkType, Incident +from firetower.incidents.models import ( + ExternalLink, + ExternalLinkType, + Incident, + IncidentStatus, +) from firetower.integrations.services.slack import SlackService SCHEDULES = { @@ -88,9 +93,11 @@ def archive_stale_channels() -> None: own_bot_id = slack.bot_id - links = ExternalLink.objects.filter(type=ExternalLinkType.SLACK).select_related( - "incident" - ) + terminal_statuses = [IncidentStatus.DONE, IncidentStatus.CANCELLED] + links = ExternalLink.objects.filter( + type=ExternalLinkType.SLACK, + incident__status__in=terminal_statuses, + ).select_related("incident") scanned = 0 archived = 0 @@ -121,7 +128,7 @@ def archive_stale_channels() -> None: skipped += 1 continue - messages = slack.get_channel_history(channel_id, limit=5) + messages = slack.get_channel_history(channel_id) non_own_messages = [ msg for msg in messages @@ -131,6 +138,17 @@ def archive_stale_channels() -> None: skipped += 1 continue + has_thread_activity = False + for msg in messages: + if msg.get("reply_count", 0) > 0: + replies = slack.get_thread_replies(channel_id, msg["ts"]) + if replies: + has_thread_activity = True + break + if has_thread_activity: + skipped += 1 + continue + notice_ts = slack.post_message(channel_id, ARCHIVE_NOTICE) if not notice_ts: logger.error( diff --git a/src/firetower/incidents/tests/test_tasks.py b/src/firetower/incidents/tests/test_tasks.py index a7ffa357..57370664 100644 --- a/src/firetower/incidents/tests/test_tasks.py +++ b/src/firetower/incidents/tests/test_tasks.py @@ -199,6 +199,7 @@ def test_archives_channel_with_no_history(self): with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): archive_stale_channels.__wrapped__() + mock_slack.get_channel_history.assert_called_once_with("C_EMPTY") mock_slack.post_message.assert_called_once_with("C_EMPTY", ARCHIVE_NOTICE) mock_slack.archive_channel.assert_called_once_with("C_EMPTY") @@ -453,3 +454,143 @@ def test_skips_archive_when_post_message_fails(self): archive_stale_channels.__wrapped__() mock_slack.archive_channel.assert_not_called() + + def test_skips_active_incident_channels(self): + active = self._make_incident(status=IncidentStatus.ACTIVE) + self._make_link(active, "C_ACTIVE_INC") + mitigated = self._make_incident(status=IncidentStatus.MITIGATED) + self._make_link(mitigated, "C_MITIGATED_INC") + postmortem = self._make_incident(status=IncidentStatus.POSTMORTEM) + self._make_link(postmortem, "C_POSTMORTEM_INC") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.parse_channel_id_from_url.assert_not_called() + mock_slack.archive_channel.assert_not_called() + + def test_includes_cancelled_incident_channels(self): + incident = self._make_incident(status=IncidentStatus.CANCELLED) + self._make_link(incident, "C_CANCELLED") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID + mock_slack.parse_channel_id_from_url.return_value = "C_CANCELLED" + mock_slack.get_channel_info.return_value = { + "id": "C_CANCELLED", + "name": "inc-3000", + "is_private": False, + "is_archived": False, + } + mock_slack.get_channel_history.return_value = [] + mock_slack.get_thread_replies.return_value = [] + mock_slack.archive_channel.return_value = True + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.archive_channel.assert_called_once_with("C_CANCELLED") + + def test_fetches_full_history_not_limited(self): + incident = self._make_incident() + self._make_link(incident, "C_DEEPHISTORY") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID + mock_slack.parse_channel_id_from_url.return_value = "C_DEEPHISTORY" + mock_slack.get_channel_info.return_value = { + "id": "C_DEEPHISTORY", + "name": "inc-4000", + "is_private": False, + "is_archived": False, + } + mock_slack.get_channel_history.return_value = [ + {"type": "message", "bot_id": self.OWN_BOT_ID, "text": "bot1", "ts": "5.0"}, + {"type": "message", "bot_id": self.OWN_BOT_ID, "text": "bot2", "ts": "4.0"}, + {"type": "message", "bot_id": self.OWN_BOT_ID, "text": "bot3", "ts": "3.0"}, + {"type": "message", "bot_id": self.OWN_BOT_ID, "text": "bot4", "ts": "2.0"}, + {"type": "message", "bot_id": self.OWN_BOT_ID, "text": "bot5", "ts": "1.5"}, + { + "type": "message", + "user": "U_HUMAN", + "text": "old human msg", + "ts": "1.0", + }, + ] + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.get_channel_history.assert_called_once_with("C_DEEPHISTORY") + mock_slack.archive_channel.assert_not_called() + + def test_skips_channel_with_human_thread_replies(self): + incident = self._make_incident() + self._make_link(incident, "C_THREADS") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID + mock_slack.parse_channel_id_from_url.return_value = "C_THREADS" + mock_slack.get_channel_info.return_value = { + "id": "C_THREADS", + "name": "inc-5000", + "is_private": False, + "is_archived": False, + } + mock_slack.get_channel_history.return_value = [ + { + "type": "message", + "bot_id": self.OWN_BOT_ID, + "text": "bot msg with thread", + "ts": "1.0", + "reply_count": 2, + } + ] + mock_slack.get_thread_replies.return_value = [ + {"type": "message", "user": "U_HUMAN", "text": "reply", "ts": "1.1"} + ] + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.get_thread_replies.assert_called_once_with("C_THREADS", "1.0") + mock_slack.archive_channel.assert_not_called() + + def test_archives_channel_with_bot_only_threads(self): + incident = self._make_incident() + self._make_link(incident, "C_BOTTHREADS") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.bot_id = self.OWN_BOT_ID + mock_slack.parse_channel_id_from_url.return_value = "C_BOTTHREADS" + mock_slack.get_channel_info.return_value = { + "id": "C_BOTTHREADS", + "name": "inc-5001", + "is_private": False, + "is_archived": False, + } + mock_slack.get_channel_history.return_value = [ + { + "type": "message", + "bot_id": self.OWN_BOT_ID, + "text": "bot msg", + "ts": "1.0", + "reply_count": 1, + } + ] + mock_slack.get_thread_replies.return_value = [] + mock_slack.post_message.return_value = "2.0" + mock_slack.archive_channel.return_value = True + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.archive_channel.assert_called_once_with("C_BOTTHREADS") From 2fae4e229671b3e666c2bc732e4fbbe50092a0cb Mon Sep 17 00:00:00 2001 From: Richard Gibert Date: Mon, 25 May 2026 11:01:38 -0400 Subject: [PATCH 8/8] fix(slack): Abort archive run when own bot ID is unresolvable Move the None bot_id guard to an early return instead of folding it into the list comprehension where it caused every message to match, preventing any channel from being archived. Co-Authored-By: Claude Agent transcript: https://claudescope.sentry.dev/share/3m8ZgzizSqg0HSivytiWzMW0Yuh3h10sQe0WTQSV0lc --- src/firetower/incidents/tasks.py | 7 ++++--- src/firetower/incidents/tests/test_tasks.py | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/firetower/incidents/tasks.py b/src/firetower/incidents/tasks.py index 41597283..b1b86159 100644 --- a/src/firetower/incidents/tasks.py +++ b/src/firetower/incidents/tasks.py @@ -92,6 +92,9 @@ def archive_stale_channels() -> None: return own_bot_id = slack.bot_id + if not own_bot_id: + logger.error("Could not determine own bot ID, aborting archive run") + return terminal_statuses = [IncidentStatus.DONE, IncidentStatus.CANCELLED] links = ExternalLink.objects.filter( @@ -130,9 +133,7 @@ def archive_stale_channels() -> None: messages = slack.get_channel_history(channel_id) non_own_messages = [ - msg - for msg in messages - if msg.get("bot_id") != own_bot_id or not own_bot_id + msg for msg in messages if msg.get("bot_id") != own_bot_id ] if non_own_messages: skipped += 1 diff --git a/src/firetower/incidents/tests/test_tasks.py b/src/firetower/incidents/tests/test_tasks.py index 57370664..ef3948ca 100644 --- a/src/firetower/incidents/tests/test_tasks.py +++ b/src/firetower/incidents/tests/test_tasks.py @@ -594,3 +594,17 @@ def test_archives_channel_with_bot_only_threads(self): archive_stale_channels.__wrapped__() mock_slack.archive_channel.assert_called_once_with("C_BOTTHREADS") + + def test_aborts_when_bot_id_is_none(self): + incident = self._make_incident() + self._make_link(incident, "C_NOBOT") + + mock_slack = MagicMock() + mock_slack.client = True + mock_slack.bot_id = None + + with patch("firetower.incidents.tasks.SlackService", return_value=mock_slack): + archive_stale_channels.__wrapped__() + + mock_slack.parse_channel_id_from_url.assert_not_called() + mock_slack.archive_channel.assert_not_called()