From 22e7ffab78a1226c7304c21cde23edfdf6460b58 Mon Sep 17 00:00:00 2001 From: khtad Date: Thu, 23 Apr 2026 17:13:30 -0400 Subject: [PATCH] Tighten faceoff decay notebook shot filtering --- notebooks/faceoff_decay_analysis.ipynb | 32 ++++++++++++++++++-------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/notebooks/faceoff_decay_analysis.ipynb b/notebooks/faceoff_decay_analysis.ipynb index 3fa7082..84e6189 100644 --- a/notebooks/faceoff_decay_analysis.ipynb +++ b/notebooks/faceoff_decay_analysis.ipynb @@ -12,13 +12,13 @@ "id": "b0hhpm5m746", "metadata": {}, "outputs": [], - "source": "import os\nimport sys\nimport sqlite3\n\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as mticker\nimport seaborn as sns\nimport numpy as np\n\n# Add src/ to path — handle CWD being project root or notebooks/\nfor _candidate in [os.path.join(os.getcwd(), \"src\"),\n os.path.join(os.getcwd(), \"..\", \"src\")]:\n _candidate = os.path.abspath(_candidate)\n if os.path.isdir(_candidate) and _candidate not in sys.path:\n sys.path.insert(0, _candidate)\n break\n\nfrom database import DATABASE_PATH\n\nsns.set_theme(style=\"whitegrid\")\nprint(f\"Database: {DATABASE_PATH}\")\nconn = sqlite3.connect(DATABASE_PATH)\nconn.row_factory = sqlite3.Row\nprint(\"Connected.\")" + "source": "import os\nimport sys\nimport sqlite3\n\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as mticker\nimport seaborn as sns\nimport numpy as np\n\n# Add src/ to path — handle CWD being project root or notebooks/\nfor _candidate in [os.path.join(os.getcwd(), \"src\"),\n os.path.join(os.getcwd(), \"..\", \"src\")]:\n _candidate = os.path.abspath(_candidate)\n if os.path.isdir(_candidate) and _candidate not in sys.path:\n sys.path.insert(0, _candidate)\n break\n\nfrom database import DATABASE_PATH\nfrom rink_viz import RINK_HALF_LENGTH, RINK_HALF_WIDTH, draw_half_rink\n\nsns.set_theme(style=\"whitegrid\")\nprint(f\"Database: {DATABASE_PATH}\")\nconn = sqlite3.connect(DATABASE_PATH)\nconn.row_factory = sqlite3.Row\nprint(\"Connected.\")" }, { "cell_type": "markdown", "id": "31r14ls1oxr", "metadata": {}, - "source": "## 1. Data availability check\n\nVerify we have enough shot events with `seconds_since_faceoff` populated, broken down by faceoff zone." + "source": "## 1. Data availability check\n\nVerify we have enough shot events with `seconds_since_faceoff` populated, broken down by faceoff zone, exclude blocked-shot rows from the analysis, and automatically exclude seasons where the raw play-by-play feed does not contain enough faceoff events to support this analysis." }, { "cell_type": "code", @@ -26,7 +26,7 @@ "id": "mslrsoqwsyl", "metadata": {}, "outputs": [], - "source": "cur = conn.cursor()\n\ncur.execute(\"SELECT COUNT(*) FROM shot_events\")\ntotal_shots = cur.fetchone()[0]\n\ncur.execute(\"SELECT COUNT(*) FROM shot_events WHERE seconds_since_faceoff IS NOT NULL\")\nwith_faceoff = cur.fetchone()[0]\n\ncur.execute(\"SELECT COUNT(*) FROM shot_events WHERE faceoff_zone_code IS NOT NULL\")\nwith_zone = cur.fetchone()[0]\n\ncoverage_pct = (with_faceoff / total_shots * 100) if total_shots > 0 else 0\nzone_pct = (with_zone / total_shots * 100) if total_shots > 0 else 0\n\nprint(f\"Total shot events: {total_shots:,}\")\nprint(f\"With seconds_since_faceoff: {with_faceoff:,} ({coverage_pct:.1f}%)\")\nprint(f\"With faceoff_zone_code: {with_zone:,} ({zone_pct:.1f}%)\")\nprint()\n\nZONE_CODES = [\"O\", \"D\", \"N\"]\nZONE_LABELS = {\"O\": \"Offensive zone\", \"D\": \"Defensive zone\", \"N\": \"Neutral zone\"}\n\ncur.execute(\"\"\"\n SELECT faceoff_zone_code,\n COUNT(*) AS shots,\n SUM(is_goal) AS goals,\n ROUND(CAST(SUM(is_goal) AS REAL) / COUNT(*), 4) AS goal_rate\n FROM shot_events\n WHERE seconds_since_faceoff IS NOT NULL\n AND faceoff_zone_code IS NOT NULL\n GROUP BY faceoff_zone_code\n ORDER BY faceoff_zone_code\n\"\"\")\nprint(f\"{'Zone':<20} {'Shots':>10} {'Goals':>8} {'Goal Rate':>10}\")\nprint(\"-\" * 52)\nfor row in cur.fetchall():\n label = ZONE_LABELS.get(row[0], row[0])\n print(f\"{label:<20} {row[1]:>10,} {row[2]:>8,} {row[3]:>10.4f}\")\n\nif total_shots == 0:\n print(\"\\n*** No shot events found. Run the scraper first. ***\")" + "source": "cur = conn.cursor()\n\nMIN_FACEOFF_SHOTS_PER_SEASON = 1000\nMIN_FACEOFF_COVERAGE_PCT = 50.0\nANALYSIS_SHOT_WHERE = \"(s.is_goal = 1 OR s.goalie_id IS NOT NULL)\"\n\n\ndef format_season_label(season_code):\n season_text = str(season_code)\n if len(season_text) == 8:\n return f\"{season_text[:4]}-{season_text[-2:]}\"\n return season_text\n\n\n\ndef season_params(*leading_params):\n return [*leading_params, *INCLUDED_SEASONS]\n\n\ncur.execute(\"SELECT COUNT(*) FROM shot_events\")\ntotal_shots = cur.fetchone()[0]\n\ncur.execute(f\"SELECT COUNT(*) FROM shot_events AS s WHERE {ANALYSIS_SHOT_WHERE}\")\nanalysis_shots = cur.fetchone()[0]\n\ncur.execute(f\"SELECT COUNT(*) FROM shot_events AS s WHERE {ANALYSIS_SHOT_WHERE} AND s.seconds_since_faceoff IS NOT NULL\")\nwith_faceoff = cur.fetchone()[0]\n\ncur.execute(f\"SELECT COUNT(*) FROM shot_events AS s WHERE {ANALYSIS_SHOT_WHERE} AND s.faceoff_zone_code IS NOT NULL\")\nwith_zone = cur.fetchone()[0]\n\ncoverage_pct = (with_faceoff / analysis_shots * 100) if analysis_shots > 0 else 0\nzone_pct = (with_zone / analysis_shots * 100) if analysis_shots > 0 else 0\nexcluded_shots = total_shots - analysis_shots\n\nprint(f\"Total shot events in table: {total_shots:,}\")\nprint(f\"Analysis-eligible shots: {analysis_shots:,}\")\nprint(f\"Excluded rows (blocked-shot proxy): {excluded_shots:,}\")\nprint(f\"With seconds_since_faceoff: {with_faceoff:,} ({coverage_pct:.1f}%)\")\nprint(f\"With faceoff_zone_code: {with_zone:,} ({zone_pct:.1f}%)\")\nprint()\n\nZONE_CODES = [\"O\", \"D\", \"N\"]\nZONE_LABELS = {\"O\": \"Offensive zone\", \"D\": \"Defensive zone\", \"N\": \"Neutral zone\"}\n\ncur.execute(f\"\"\"\n SELECT g.season,\n COUNT(*) AS total_shots,\n SUM(CASE WHEN s.seconds_since_faceoff IS NOT NULL THEN 1 ELSE 0 END) AS faceoff_shots,\n SUM(CASE WHEN s.faceoff_zone_code IS NOT NULL THEN 1 ELSE 0 END) AS zone_shots\n FROM shot_events AS s\n JOIN games AS g\n ON g.game_id = s.game_id\n WHERE {ANALYSIS_SHOT_WHERE}\n GROUP BY g.season\n ORDER BY g.season\n\"\"\")\nseason_coverage_rows = cur.fetchall()\n\nINCLUDED_SEASONS = []\nEXCLUDED_SEASONS = []\n\nprint(\"Season-level faceoff coverage:\")\nprint(f\"{'Season':<10} {'Total Shots':>12} {'With Faceoff':>13} {'Coverage %':>11} {'Status':>10}\")\nprint(\"-\" * 62)\nfor row in season_coverage_rows:\n faceoff_count = row[\"faceoff_shots\"] or 0\n season_total = row[\"total_shots\"] or 0\n season_coverage_pct = (faceoff_count / season_total * 100) if season_total > 0 else 0.0\n include_season = (\n faceoff_count >= MIN_FACEOFF_SHOTS_PER_SEASON\n and season_coverage_pct >= MIN_FACEOFF_COVERAGE_PCT\n )\n status = \"included\" if include_season else \"excluded\"\n target_list = INCLUDED_SEASONS if include_season else EXCLUDED_SEASONS\n target_list.append(row[\"season\"])\n print(f\"{format_season_label(row['season']):<10} {season_total:>12,} {faceoff_count:>13,} {season_coverage_pct:>10.1f}% {status:>10}\")\n\nif not INCLUDED_SEASONS:\n raise RuntimeError(\"No seasons meet the faceoff coverage threshold for analysis.\")\n\nFACEOFF_SEASON_PLACEHOLDERS = \", \".join([\"?\" for _ in INCLUDED_SEASONS])\nFACEOFF_SEASON_WHERE = f\"g.season IN ({FACEOFF_SEASON_PLACEHOLDERS})\"\n\nincluded_labels = \", \".join(format_season_label(season) for season in INCLUDED_SEASONS)\nprint(f\"\\nFaceoff analysis will use: {included_labels}\")\nprint(\"Blocked shots are excluded via the current derived-table proxy: keep goals plus shots with a recorded goalie.\")\n\nif EXCLUDED_SEASONS:\n excluded_labels = \", \".join(format_season_label(season) for season in EXCLUDED_SEASONS)\n print(f\"Excluded due to incomplete faceoff event coverage: {excluded_labels}\")\n print(\"These seasons still have shot rows, but their raw play-by-play feed is missing most faceoff events.\")\n\nprint()\n\ncur.execute(f\"\"\"\n SELECT s.faceoff_zone_code,\n COUNT(*) AS shots,\n SUM(s.is_goal) AS goals,\n ROUND(CAST(SUM(s.is_goal) AS REAL) / COUNT(*), 4) AS goal_rate\n FROM shot_events AS s\n JOIN games AS g\n ON g.game_id = s.game_id\n WHERE {ANALYSIS_SHOT_WHERE}\n AND s.seconds_since_faceoff IS NOT NULL\n AND s.faceoff_zone_code IS NOT NULL\n AND {FACEOFF_SEASON_WHERE}\n GROUP BY s.faceoff_zone_code\n ORDER BY s.faceoff_zone_code\n\"\"\", season_params())\nprint(f\"{'Zone':<20} {'Shots':>10} {'Goals':>8} {'Goal Rate':>10}\")\nprint(\"-\" * 52)\nfor row in cur.fetchall():\n label = ZONE_LABELS.get(row[0], row[0])\n print(f\"{label:<20} {row[1]:>10,} {row[2]:>8,} {row[3]:>10.4f}\")\n\nif total_shots == 0:\n print(\"\\n*** No shot events found. Run the scraper first. ***\")" }, { "cell_type": "markdown", @@ -40,7 +40,7 @@ "id": "8qnq4rb08rv", "metadata": {}, "outputs": [], - "source": "MAX_SECONDS = 120\nSMOOTH_WINDOW = 5\n\nBIN_BOUNDARIES = [5, 15, 30, 60]\nBIN_COLORS = [\"#e74c3c\", \"#e67e22\", \"#f1c40f\", \"#2ecc71\"]\n\nZONE_COLORS = {\"O\": \"#e74c3c\", \"D\": \"#3498db\", \"N\": \"#2ecc71\"}\n\nzone_data = {}\n\nfor zone in ZONE_CODES:\n cur.execute(\"\"\"\n SELECT seconds_since_faceoff AS sec,\n COUNT(*) AS shot_count,\n SUM(is_goal) AS goal_count\n FROM shot_events\n WHERE seconds_since_faceoff IS NOT NULL\n AND seconds_since_faceoff BETWEEN 0 AND ?\n AND faceoff_zone_code = ?\n GROUP BY seconds_since_faceoff\n ORDER BY seconds_since_faceoff\n \"\"\", (MAX_SECONDS, zone))\n\n rows = cur.fetchall()\n if not rows:\n continue\n\n z_sec = np.array([r[0] for r in rows])\n z_shots = np.array([r[1] for r in rows])\n z_goals = np.array([r[2] for r in rows])\n z_rate = np.where(z_shots > 0, z_goals / z_shots, 0.0)\n\n if len(z_rate) >= SMOOTH_WINDOW:\n z_smooth = np.convolve(z_rate, np.ones(SMOOTH_WINDOW) / SMOOTH_WINDOW, mode=\"same\")\n else:\n z_smooth = z_rate\n\n zone_data[zone] = {\n \"seconds\": z_sec, \"shots\": z_shots, \"goals\": z_goals,\n \"goal_rate\": z_rate, \"smoothed_rate\": z_smooth,\n }\n\nfig, axes = plt.subplots(len(zone_data), 2, figsize=(16, 5 * len(zone_data)), sharex=True)\nif len(zone_data) == 1:\n axes = axes.reshape(1, -1)\n\nfor row_idx, zone in enumerate(ZONE_CODES):\n if zone not in zone_data:\n continue\n zd = zone_data[zone]\n color = ZONE_COLORS[zone]\n label = ZONE_LABELS[zone]\n\n ax_vol = axes[row_idx, 0]\n ax_vol.bar(zd[\"seconds\"], zd[\"shots\"], width=1.0, color=color, alpha=0.7)\n ax_vol.set_ylabel(\"Shot count\")\n ax_vol.set_title(f\"{label} — Shot Volume\")\n for b in BIN_BOUNDARIES:\n ax_vol.axvline(x=b + 0.5, color=\"#bdc3c7\", linestyle=\"--\", linewidth=1.2)\n\n ax_gr = axes[row_idx, 1]\n ax_gr.plot(zd[\"seconds\"], zd[\"goal_rate\"], alpha=0.25, color=\"#95a5a6\", linewidth=0.8, label=\"Raw (1s)\")\n ax_gr.plot(zd[\"seconds\"], zd[\"smoothed_rate\"], color=color, linewidth=2.0,\n label=f\"Smoothed ({SMOOTH_WINDOW}s avg)\")\n ax_gr.set_ylabel(\"Goal rate\")\n ax_gr.set_title(f\"{label} — Goal Rate\")\n ax_gr.legend(fontsize=8)\n for b in BIN_BOUNDARIES:\n ax_gr.axvline(x=b + 0.5, color=\"#bdc3c7\", linestyle=\"--\", linewidth=1.2)\n\naxes[-1, 0].set_xlabel(\"Seconds since faceoff\")\naxes[-1, 1].set_xlabel(\"Seconds since faceoff\")\n\nfig.tight_layout()\nplt.show()\n\nfor zone in ZONE_CODES:\n if zone not in zone_data:\n continue\n zd = zone_data[zone]\n print(f\"{ZONE_LABELS[zone]}: {zd['shots'].sum():,} shots, {zd['goals'].sum():,} goals \"\n f\"(goal rate {zd['goals'].sum() / zd['shots'].sum():.4f})\")" + "source": "MAX_SECONDS = 120\nSMOOTH_WINDOW = 5\n\nBIN_BOUNDARIES = [5, 15, 30, 60]\nBIN_COLORS = [\"#e74c3c\", \"#e67e22\", \"#f1c40f\", \"#2ecc71\"]\n\nZONE_COLORS = {\"O\": \"#e74c3c\", \"D\": \"#3498db\", \"N\": \"#2ecc71\"}\n\nzone_data = {}\n\nfor zone in ZONE_CODES:\n cur.execute(f\"\"\"\n SELECT s.seconds_since_faceoff AS sec,\n COUNT(*) AS shot_count,\n SUM(s.is_goal) AS goal_count\n FROM shot_events AS s\n JOIN games AS g\n ON g.game_id = s.game_id\n WHERE {ANALYSIS_SHOT_WHERE}\n AND s.seconds_since_faceoff IS NOT NULL\n AND s.seconds_since_faceoff BETWEEN 0 AND ?\n AND s.faceoff_zone_code = ?\n AND {FACEOFF_SEASON_WHERE}\n GROUP BY s.seconds_since_faceoff\n ORDER BY s.seconds_since_faceoff\n \"\"\", season_params(MAX_SECONDS, zone))\n\n rows = cur.fetchall()\n if not rows:\n continue\n\n z_sec = np.array([r[0] for r in rows])\n z_shots = np.array([r[1] for r in rows])\n z_goals = np.array([r[2] for r in rows])\n z_rate = np.where(z_shots > 0, z_goals / z_shots, 0.0)\n\n if len(z_rate) >= SMOOTH_WINDOW:\n z_smooth = np.convolve(z_rate, np.ones(SMOOTH_WINDOW) / SMOOTH_WINDOW, mode=\"same\")\n else:\n z_smooth = z_rate\n\n zone_data[zone] = {\n \"seconds\": z_sec, \"shots\": z_shots, \"goals\": z_goals,\n \"goal_rate\": z_rate, \"smoothed_rate\": z_smooth,\n }\n\nfig, axes = plt.subplots(len(zone_data), 2, figsize=(16, 5 * len(zone_data)), sharex=True)\nif len(zone_data) == 1:\n axes = axes.reshape(1, -1)\n\nfor row_idx, zone in enumerate(ZONE_CODES):\n if zone not in zone_data:\n continue\n zd = zone_data[zone]\n color = ZONE_COLORS[zone]\n label = ZONE_LABELS[zone]\n\n ax_vol = axes[row_idx, 0]\n ax_vol.bar(zd[\"seconds\"], zd[\"shots\"], width=1.0, color=color, alpha=0.7)\n ax_vol.set_ylabel(\"Shot count\")\n ax_vol.set_title(f\"{label} — Shot Volume\")\n for b in BIN_BOUNDARIES:\n ax_vol.axvline(x=b + 0.5, color=\"#bdc3c7\", linestyle=\"--\", linewidth=1.2)\n\n ax_gr = axes[row_idx, 1]\n ax_gr.plot(zd[\"seconds\"], zd[\"goal_rate\"], alpha=0.25, color=\"#95a5a6\", linewidth=0.8, label=\"Raw (1s)\")\n ax_gr.plot(zd[\"seconds\"], zd[\"smoothed_rate\"], color=color, linewidth=2.0,\n label=f\"Smoothed ({SMOOTH_WINDOW}s avg)\")\n ax_gr.set_ylabel(\"Goal rate\")\n ax_gr.set_title(f\"{label} — Goal Rate\")\n ax_gr.legend(fontsize=8)\n for b in BIN_BOUNDARIES:\n ax_gr.axvline(x=b + 0.5, color=\"#bdc3c7\", linestyle=\"--\", linewidth=1.2)\n\naxes[-1, 0].set_xlabel(\"Seconds since faceoff\")\naxes[-1, 1].set_xlabel(\"Seconds since faceoff\")\n\nfig.tight_layout()\nplt.show()\n\nfor zone in ZONE_CODES:\n if zone not in zone_data:\n continue\n zd = zone_data[zone]\n print(f\"{ZONE_LABELS[zone]}: {zd['shots'].sum():,} shots, {zd['goals'].sum():,} goals \"\n f\"(goal rate {zd['goals'].sum() / zd['shots'].sum():.4f})\")" }, { "cell_type": "code", @@ -62,7 +62,7 @@ "id": "ytf9zr1l70g", "metadata": {}, "outputs": [], - "source": "for zone in ZONE_CODES:\n label = ZONE_LABELS[zone]\n print(f\"\\n{'='*68}\")\n print(f\" {label}\")\n print(f\"{'='*68}\")\n\n cur.execute(\"\"\"\n SELECT\n CASE\n WHEN seconds_since_faceoff BETWEEN 0 AND 5 THEN 'immediate (0-5s)'\n WHEN seconds_since_faceoff BETWEEN 6 AND 15 THEN 'early (6-15s)'\n WHEN seconds_since_faceoff BETWEEN 16 AND 30 THEN 'mid (16-30s)'\n WHEN seconds_since_faceoff BETWEEN 31 AND 60 THEN 'late (31-60s)'\n ELSE 'steady_state (61+s)'\n END AS recency_bin,\n COUNT(*) AS shots,\n SUM(is_goal) AS goals,\n ROUND(CAST(SUM(is_goal) AS REAL) / COUNT(*), 4) AS goal_rate,\n ROUND(AVG(distance_to_goal), 1) AS avg_distance\n FROM shot_events\n WHERE seconds_since_faceoff IS NOT NULL\n AND seconds_since_faceoff >= 0\n AND faceoff_zone_code = ?\n GROUP BY recency_bin\n ORDER BY\n CASE recency_bin\n WHEN 'immediate (0-5s)' THEN 1\n WHEN 'early (6-15s)' THEN 2\n WHEN 'mid (16-30s)' THEN 3\n WHEN 'late (31-60s)' THEN 4\n ELSE 5\n END\n \"\"\", (zone,))\n\n print(f\"{'Recency Bin':<25} {'Shots':>10} {'Goals':>8} {'Goal Rate':>10} {'Avg Dist':>10}\")\n print(\"-\" * 68)\n for row in cur.fetchall():\n print(f\"{row[0]:<25} {row[1]:>10,} {row[2]:>8,} {row[3]:>10.4f} {row[4]:>10.1f}\")" + "source": "for zone in ZONE_CODES:\n label = ZONE_LABELS[zone]\n print(f\"\\n{'='*68}\")\n print(f\" {label}\")\n print(f\"{'='*68}\")\n\n cur.execute(f\"\"\"\n SELECT\n CASE\n WHEN s.seconds_since_faceoff BETWEEN 0 AND 5 THEN 'immediate (0-5s)'\n WHEN s.seconds_since_faceoff BETWEEN 6 AND 15 THEN 'early (6-15s)'\n WHEN s.seconds_since_faceoff BETWEEN 16 AND 30 THEN 'mid (16-30s)'\n WHEN s.seconds_since_faceoff BETWEEN 31 AND 60 THEN 'late (31-60s)'\n ELSE 'steady_state (61+s)'\n END AS recency_bin,\n COUNT(*) AS shots,\n SUM(s.is_goal) AS goals,\n ROUND(CAST(SUM(s.is_goal) AS REAL) / COUNT(*), 4) AS goal_rate,\n ROUND(AVG(s.distance_to_goal), 1) AS avg_distance\n FROM shot_events AS s\n JOIN games AS g\n ON g.game_id = s.game_id\n WHERE {ANALYSIS_SHOT_WHERE}\n AND s.seconds_since_faceoff IS NOT NULL\n AND s.seconds_since_faceoff >= 0\n AND s.faceoff_zone_code = ?\n AND {FACEOFF_SEASON_WHERE}\n GROUP BY recency_bin\n ORDER BY\n CASE recency_bin\n WHEN 'immediate (0-5s)' THEN 1\n WHEN 'early (6-15s)' THEN 2\n WHEN 'mid (16-30s)' THEN 3\n WHEN 'late (31-60s)' THEN 4\n ELSE 5\n END\n \"\"\", season_params(zone))\n\n print(f\"{'Recency Bin':<25} {'Shots':>10} {'Goals':>8} {'Goal Rate':>10} {'Avg Dist':>10}\")\n print(\"-\" * 68)\n for row in cur.fetchall():\n print(f\"{row[0]:<25} {row[1]:>10,} {row[2]:>8,} {row[3]:>10.4f} {row[4]:>10.1f}\")" }, { "cell_type": "markdown", @@ -76,7 +76,7 @@ "id": "3ax6t8nub4v", "metadata": {}, "outputs": [], - "source": "BIN_NAMES = [\"immediate\\n(0-5s)\", \"early\\n(6-15s)\", \"mid\\n(16-30s)\", \"late\\n(31-60s)\", \"steady_state\\n(61+s)\"]\nBIN_SQL_LABELS = [\"immediate (0-5s)\", \"early (6-15s)\", \"mid (16-30s)\", \"late (31-60s)\", \"steady_state (61+s)\"]\n\ncur.execute(\"\"\"\n SELECT\n faceoff_zone_code AS zone,\n CASE\n WHEN seconds_since_faceoff BETWEEN 0 AND 5 THEN 'immediate (0-5s)'\n WHEN seconds_since_faceoff BETWEEN 6 AND 15 THEN 'early (6-15s)'\n WHEN seconds_since_faceoff BETWEEN 16 AND 30 THEN 'mid (16-30s)'\n WHEN seconds_since_faceoff BETWEEN 31 AND 60 THEN 'late (31-60s)'\n ELSE 'steady_state (61+s)'\n END AS recency_bin,\n CAST(SUM(is_goal) AS REAL) / COUNT(*) AS goal_rate\n FROM shot_events\n WHERE seconds_since_faceoff IS NOT NULL\n AND seconds_since_faceoff >= 0\n AND faceoff_zone_code IS NOT NULL\n GROUP BY zone, recency_bin\n\"\"\")\n\nzone_bin_rates = {}\nfor row in cur.fetchall():\n zone_bin_rates[(row[0], row[1])] = row[2]\n\nx = np.arange(len(BIN_SQL_LABELS))\nBAR_WIDTH = 0.25\n\nfig, ax = plt.subplots(figsize=(14, 6))\n\nfor i, zone in enumerate(ZONE_CODES):\n rates = [zone_bin_rates.get((zone, b), 0.0) for b in BIN_SQL_LABELS]\n ax.bar(x + i * BAR_WIDTH, rates, BAR_WIDTH,\n color=ZONE_COLORS[zone], alpha=0.8, label=ZONE_LABELS[zone])\n\nax.set_xticks(x + BAR_WIDTH)\nax.set_xticklabels(BIN_NAMES)\nax.set_ylabel(\"Goal rate\")\nax.set_title(\"Goal Rate by Recency Bin — Separated by Faceoff Zone\")\nax.legend()\nax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1.0, decimals=1))\n\nfig.tight_layout()\nplt.show()" + "source": "BIN_NAMES = [\"immediate\\n(0-5s)\", \"early\\n(6-15s)\", \"mid\\n(16-30s)\", \"late\\n(31-60s)\", \"steady_state\\n(61+s)\"]\nBIN_SQL_LABELS = [\"immediate (0-5s)\", \"early (6-15s)\", \"mid (16-30s)\", \"late (31-60s)\", \"steady_state (61+s)\"]\n\ncur.execute(f\"\"\"\n SELECT\n s.faceoff_zone_code AS zone,\n CASE\n WHEN s.seconds_since_faceoff BETWEEN 0 AND 5 THEN 'immediate (0-5s)'\n WHEN s.seconds_since_faceoff BETWEEN 6 AND 15 THEN 'early (6-15s)'\n WHEN s.seconds_since_faceoff BETWEEN 16 AND 30 THEN 'mid (16-30s)'\n WHEN s.seconds_since_faceoff BETWEEN 31 AND 60 THEN 'late (31-60s)'\n ELSE 'steady_state (61+s)'\n END AS recency_bin,\n CAST(SUM(s.is_goal) AS REAL) / COUNT(*) AS goal_rate\n FROM shot_events AS s\n JOIN games AS g\n ON g.game_id = s.game_id\n WHERE {ANALYSIS_SHOT_WHERE}\n AND s.seconds_since_faceoff IS NOT NULL\n AND s.seconds_since_faceoff >= 0\n AND s.faceoff_zone_code IS NOT NULL\n AND {FACEOFF_SEASON_WHERE}\n GROUP BY zone, recency_bin\n\"\"\", season_params())\n\nzone_bin_rates = {}\nfor row in cur.fetchall():\n zone_bin_rates[(row[0], row[1])] = row[2]\n\nx = np.arange(len(BIN_SQL_LABELS))\nBAR_WIDTH = 0.25\n\nfig, ax = plt.subplots(figsize=(14, 6))\n\nfor i, zone in enumerate(ZONE_CODES):\n rates = [zone_bin_rates.get((zone, b), 0.0) for b in BIN_SQL_LABELS]\n ax.bar(x + i * BAR_WIDTH, rates, BAR_WIDTH,\n color=ZONE_COLORS[zone], alpha=0.8, label=ZONE_LABELS[zone])\n\nax.set_xticks(x + BAR_WIDTH)\nax.set_xticklabels(BIN_NAMES)\nax.set_ylabel(\"Goal rate\")\nax.set_title(\"Goal Rate by Recency Bin — Separated by Faceoff Zone\")\nax.legend()\nax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1.0, decimals=1))\n\nfig.tight_layout()\nplt.show()" }, { "cell_type": "markdown", @@ -104,7 +104,7 @@ "id": "2s3u5uz3pmr", "metadata": {}, "outputs": [], - "source": "cur.execute(\"\"\"\n SELECT manpower_state, COUNT(*) AS n\n FROM shot_events\n WHERE seconds_since_faceoff IS NOT NULL\n AND manpower_state IS NOT NULL\n GROUP BY manpower_state\n ORDER BY n DESC\n\"\"\")\nall_mp_states = [(row[0], row[1]) for row in cur.fetchall()]\n\nMIN_SHOTS_FOR_CURVE = 50\n\n_MP_PALETTE = sns.color_palette(\"tab10\", n_colors=len(all_mp_states))\nMP_STATE_COLORS = {state: _MP_PALETTE[i] for i, (state, _) in enumerate(all_mp_states)}\n\nprint(f\"Manpower states found: {len(all_mp_states)}\")\nfor state, n in all_mp_states:\n marker = \"\" if n >= MIN_SHOTS_FOR_CURVE else \" (too few for curve)\"\n print(f\" {state}: {n:,} shots{marker}\")\n\nfig, axes = plt.subplots(len(ZONE_CODES), 2, figsize=(16, 6 * len(ZONE_CODES)), sharex=True)\n\nfor row_idx, zone in enumerate(ZONE_CODES):\n ax_vol = axes[row_idx, 0]\n ax_gr = axes[row_idx, 1]\n zone_label = ZONE_LABELS[zone]\n\n for mp_state, total_n in all_mp_states:\n cur.execute(\"\"\"\n SELECT seconds_since_faceoff,\n COUNT(*) AS shots,\n SUM(is_goal) AS goals\n FROM shot_events\n WHERE seconds_since_faceoff IS NOT NULL\n AND seconds_since_faceoff BETWEEN 0 AND ?\n AND faceoff_zone_code = ?\n AND manpower_state = ?\n GROUP BY seconds_since_faceoff\n ORDER BY seconds_since_faceoff\n \"\"\", (MAX_SECONDS, zone, mp_state))\n\n rows = cur.fetchall()\n if not rows:\n continue\n\n m_sec = np.array([r[0] for r in rows])\n m_shots = np.array([r[1] for r in rows])\n m_goals = np.array([r[2] for r in rows])\n zone_mp_total = m_shots.sum()\n\n if zone_mp_total < MIN_SHOTS_FOR_CURVE:\n continue\n\n m_rate = np.where(m_shots > 0, m_goals / m_shots, 0.0)\n\n if len(m_rate) >= SMOOTH_WINDOW:\n m_smooth = np.convolve(m_rate, np.ones(SMOOTH_WINDOW) / SMOOTH_WINDOW, mode=\"same\")\n else:\n m_smooth = m_rate\n\n color = MP_STATE_COLORS[mp_state]\n curve_label = f\"{mp_state} (n={zone_mp_total:,})\"\n ax_vol.plot(m_sec, m_shots, color=color, alpha=0.8, linewidth=1.5, label=curve_label)\n ax_gr.plot(m_sec, m_smooth, color=color, linewidth=2.0, label=curve_label)\n\n ax_vol.set_ylabel(\"Shot count\")\n ax_vol.set_title(f\"{zone_label} — Shot Volume by Manpower State\")\n ax_vol.legend(fontsize=7, loc=\"upper right\")\n\n ax_gr.set_ylabel(\"Goal rate (smoothed)\")\n ax_gr.set_title(f\"{zone_label} — Goal Rate by Manpower State\")\n ax_gr.legend(fontsize=7, loc=\"upper right\")\n\n for ax in (ax_vol, ax_gr):\n for b in BIN_BOUNDARIES:\n ax.axvline(x=b + 0.5, color=\"#bdc3c7\", linestyle=\":\", linewidth=1.0)\n\naxes[-1, 0].set_xlabel(\"Seconds since faceoff\")\naxes[-1, 1].set_xlabel(\"Seconds since faceoff\")\n\nfig.tight_layout()\nplt.show()" + "source": "cur.execute(f\"\"\"\n SELECT s.manpower_state, COUNT(*) AS n\n FROM shot_events AS s\n JOIN games AS g\n ON g.game_id = s.game_id\n WHERE {ANALYSIS_SHOT_WHERE}\n AND s.seconds_since_faceoff IS NOT NULL\n AND s.manpower_state IS NOT NULL\n AND {FACEOFF_SEASON_WHERE}\n GROUP BY s.manpower_state\n ORDER BY n DESC\n\"\"\", season_params())\nall_mp_states = [(row[0], row[1]) for row in cur.fetchall()]\n\nMIN_SHOTS_FOR_CURVE = 50\n\n_MP_PALETTE = sns.color_palette(\"tab10\", n_colors=len(all_mp_states))\nMP_STATE_COLORS = {state: _MP_PALETTE[i] for i, (state, _) in enumerate(all_mp_states)}\n\nprint(f\"Manpower states found: {len(all_mp_states)}\")\nfor state, n in all_mp_states:\n marker = \"\" if n >= MIN_SHOTS_FOR_CURVE else \" (too few for curve)\"\n print(f\" {state}: {n:,} shots{marker}\")\n\nfig, axes = plt.subplots(len(ZONE_CODES), 2, figsize=(16, 6 * len(ZONE_CODES)), sharex=True)\n\nfor row_idx, zone in enumerate(ZONE_CODES):\n ax_vol = axes[row_idx, 0]\n ax_gr = axes[row_idx, 1]\n zone_label = ZONE_LABELS[zone]\n\n for mp_state, total_n in all_mp_states:\n cur.execute(f\"\"\"\n SELECT s.seconds_since_faceoff,\n COUNT(*) AS shots,\n SUM(s.is_goal) AS goals\n FROM shot_events AS s\n JOIN games AS g\n ON g.game_id = s.game_id\n WHERE {ANALYSIS_SHOT_WHERE}\n AND s.seconds_since_faceoff IS NOT NULL\n AND s.seconds_since_faceoff BETWEEN 0 AND ?\n AND s.faceoff_zone_code = ?\n AND s.manpower_state = ?\n AND {FACEOFF_SEASON_WHERE}\n GROUP BY s.seconds_since_faceoff\n ORDER BY s.seconds_since_faceoff\n \"\"\", season_params(MAX_SECONDS, zone, mp_state))\n\n rows = cur.fetchall()\n if not rows:\n continue\n\n m_sec = np.array([r[0] for r in rows])\n m_shots = np.array([r[1] for r in rows])\n m_goals = np.array([r[2] for r in rows])\n zone_mp_total = m_shots.sum()\n\n if zone_mp_total < MIN_SHOTS_FOR_CURVE:\n continue\n\n m_rate = np.where(m_shots > 0, m_goals / m_shots, 0.0)\n\n if len(m_rate) >= SMOOTH_WINDOW:\n m_smooth = np.convolve(m_rate, np.ones(SMOOTH_WINDOW) / SMOOTH_WINDOW, mode=\"same\")\n else:\n m_smooth = m_rate\n\n color = MP_STATE_COLORS[mp_state]\n curve_label = f\"{mp_state} (n={zone_mp_total:,})\"\n ax_vol.plot(m_sec, m_shots, color=color, alpha=0.8, linewidth=1.5, label=curve_label)\n ax_gr.plot(m_sec, m_smooth, color=color, linewidth=2.0, label=curve_label)\n\n ax_vol.set_ylabel(\"Shot count\")\n ax_vol.set_title(f\"{zone_label} — Shot Volume by Manpower State\")\n ax_vol.legend(fontsize=7, loc=\"upper right\")\n\n ax_gr.set_ylabel(\"Goal rate (smoothed)\")\n ax_gr.set_title(f\"{zone_label} — Goal Rate by Manpower State\")\n ax_gr.legend(fontsize=7, loc=\"upper right\")\n\n for ax in (ax_vol, ax_gr):\n for b in BIN_BOUNDARIES:\n ax.axvline(x=b + 0.5, color=\"#bdc3c7\", linestyle=\":\", linewidth=1.0)\n\naxes[-1, 0].set_xlabel(\"Seconds since faceoff\")\naxes[-1, 1].set_xlabel(\"Seconds since faceoff\")\n\nfig.tight_layout()\nplt.show()" }, { "cell_type": "markdown", @@ -132,13 +132,27 @@ "id": "z9zf4ko2bp", "metadata": {}, "outputs": [], - "source": "fig, axes = plt.subplots(1, len(ZONE_CODES), figsize=(18, 5), sharey=True)\n\ncur.execute(\"\"\"\n SELECT AVG(distance_to_goal)\n FROM shot_events\n WHERE distance_to_goal IS NOT NULL\n\"\"\")\nleague_avg_dist = cur.fetchone()[0]\n\nfor col_idx, zone in enumerate(ZONE_CODES):\n ax = axes[col_idx]\n\n cur.execute(\"\"\"\n SELECT seconds_since_faceoff,\n AVG(distance_to_goal) AS avg_dist,\n COUNT(*) AS n\n FROM shot_events\n WHERE seconds_since_faceoff IS NOT NULL\n AND seconds_since_faceoff BETWEEN 0 AND ?\n AND distance_to_goal IS NOT NULL\n AND faceoff_zone_code = ?\n GROUP BY seconds_since_faceoff\n ORDER BY seconds_since_faceoff\n \"\"\", (MAX_SECONDS, zone))\n\n rows = cur.fetchall()\n if not rows:\n continue\n\n d_sec = np.array([r[0] for r in rows])\n d_avg = np.array([r[1] for r in rows])\n\n if len(d_avg) >= SMOOTH_WINDOW:\n d_smooth = np.convolve(d_avg, np.ones(SMOOTH_WINDOW) / SMOOTH_WINDOW, mode=\"same\")\n else:\n d_smooth = d_avg\n\n color = ZONE_COLORS[zone]\n ax.scatter(d_sec, d_avg, alpha=0.2, s=8, color=\"#95a5a6\")\n ax.plot(d_sec, d_smooth, color=color, linewidth=2.0, label=f\"Smoothed ({SMOOTH_WINDOW}s)\")\n if league_avg_dist is not None:\n ax.axhline(y=league_avg_dist, color=\"#e67e22\", linestyle=\"--\", linewidth=1.2,\n label=f\"League avg ({league_avg_dist:.1f} ft)\")\n ax.set_xlabel(\"Seconds since faceoff\")\n ax.set_title(f\"{ZONE_LABELS[zone]} — Avg Shot Distance\")\n ax.legend(fontsize=8)\n\n for b in BIN_BOUNDARIES:\n ax.axvline(x=b + 0.5, color=\"#bdc3c7\", linestyle=\":\", linewidth=1.0)\n\naxes[0].set_ylabel(\"Average distance to goal (ft)\")\n\nfig.tight_layout()\nplt.show()" + "source": "fig, axes = plt.subplots(1, len(ZONE_CODES), figsize=(18, 5), sharey=True)\n\ncur.execute(f\"\"\"\n SELECT AVG(s.distance_to_goal)\n FROM shot_events AS s\n JOIN games AS g\n ON g.game_id = s.game_id\n WHERE {ANALYSIS_SHOT_WHERE}\n AND s.distance_to_goal IS NOT NULL\n AND {FACEOFF_SEASON_WHERE}\n\"\"\", season_params())\nleague_avg_dist = cur.fetchone()[0]\n\nfor col_idx, zone in enumerate(ZONE_CODES):\n ax = axes[col_idx]\n\n cur.execute(f\"\"\"\n SELECT s.seconds_since_faceoff,\n AVG(s.distance_to_goal) AS avg_dist,\n COUNT(*) AS n\n FROM shot_events AS s\n JOIN games AS g\n ON g.game_id = s.game_id\n WHERE {ANALYSIS_SHOT_WHERE}\n AND s.seconds_since_faceoff IS NOT NULL\n AND s.seconds_since_faceoff BETWEEN 0 AND ?\n AND s.distance_to_goal IS NOT NULL\n AND s.faceoff_zone_code = ?\n AND {FACEOFF_SEASON_WHERE}\n GROUP BY s.seconds_since_faceoff\n ORDER BY s.seconds_since_faceoff\n \"\"\", season_params(MAX_SECONDS, zone))\n\n rows = cur.fetchall()\n if not rows:\n continue\n\n d_sec = np.array([r[0] for r in rows])\n d_avg = np.array([r[1] for r in rows])\n\n if len(d_avg) >= SMOOTH_WINDOW:\n d_smooth = np.convolve(d_avg, np.ones(SMOOTH_WINDOW) / SMOOTH_WINDOW, mode=\"same\")\n else:\n d_smooth = d_avg\n\n color = ZONE_COLORS[zone]\n ax.scatter(d_sec, d_avg, alpha=0.2, s=8, color=\"#95a5a6\")\n ax.plot(d_sec, d_smooth, color=color, linewidth=2.0, label=f\"Smoothed ({SMOOTH_WINDOW}s)\")\n if league_avg_dist is not None:\n ax.axhline(y=league_avg_dist, color=\"#e67e22\", linestyle=\"--\", linewidth=1.2,\n label=f\"League avg ({league_avg_dist:.1f} ft)\")\n ax.set_xlabel(\"Seconds since faceoff\")\n ax.set_title(f\"{ZONE_LABELS[zone]} — Avg Shot Distance\")\n ax.legend(fontsize=8)\n\n for b in BIN_BOUNDARIES:\n ax.axvline(x=b + 0.5, color=\"#bdc3c7\", linestyle=\":\", linewidth=1.0)\n\naxes[0].set_ylabel(\"Average distance to goal (ft)\")\n\nfig.tight_layout()\nplt.show()" + }, + { + "cell_type": "markdown", + "id": "season-period-heatmaps", + "metadata": {}, + "source": "## 9. Season × period shot-location heatmaps\n\nTo sanity-check the distance calculations visually, facet the post-faceoff shot locations by season and by period. These grids use only the included seasons from section 1, so pre-2009 feed-coverage gaps do not swamp the comparison." + }, + { + "cell_type": "code", + "execution_count": null, + "id": "season-period-heatmaps-code", + "metadata": {}, + "outputs": [], + "source": "SEASON_PANEL_COLUMNS = 4\nHEXBIN_GRIDSIZE = 24\nHEXBIN_MIN_COUNT = 1\nMIN_PERIOD_SHOTS_FOR_HEATMAP = 5_000\nHEATMAP_CMAP = \"magma\"\nHEATMAP_BINS = \"log\"\nHALF_RINK_EXTENT = (0.0, float(RINK_HALF_LENGTH), -float(RINK_HALF_WIDTH), float(RINK_HALF_WIDTH))\n\n\ndef format_period_label(period_number):\n if period_number <= 3:\n return f\"Period {period_number}\"\n return f\"Overtime {period_number - 3}\"\n\n\ncur.execute(f\"\"\"\n SELECT g.season,\n s.period,\n s.x_coord,\n s.y_coord\n FROM shot_events AS s\n JOIN games AS g\n ON g.game_id = s.game_id\n WHERE {ANALYSIS_SHOT_WHERE}\n AND s.seconds_since_faceoff IS NOT NULL\n AND s.x_coord IS NOT NULL\n AND s.y_coord IS NOT NULL\n AND {FACEOFF_SEASON_WHERE}\n ORDER BY g.season, s.period\n\"\"\", season_params())\n\nseason_period_shots = {}\nperiod_totals = {}\n\nfor row in cur.fetchall():\n key = (row[\"season\"], row[\"period\"])\n if key not in season_period_shots:\n season_period_shots[key] = {\"x\": [], \"y\": []}\n season_period_shots[key][\"x\"].append(row[\"x_coord\"])\n season_period_shots[key][\"y\"].append(row[\"y_coord\"])\n period_totals[row[\"period\"]] = period_totals.get(row[\"period\"], 0) + 1\n\nseasons = INCLUDED_SEASONS\nperiods_for_figures = [\n period for period, shot_total in sorted(period_totals.items())\n if shot_total >= MIN_PERIOD_SHOTS_FOR_HEATMAP\n]\nskipped_periods = [\n (period, shot_total) for period, shot_total in sorted(period_totals.items())\n if shot_total < MIN_PERIOD_SHOTS_FOR_HEATMAP\n]\n\nprint(\"Rendered period figures:\")\nfor period in periods_for_figures:\n print(f\" {format_period_label(period)}: {period_totals[period]:,} shots\")\n\nif skipped_periods:\n print(\"\\nSparse periods omitted from the seasonal grids:\")\n for period, shot_total in skipped_periods:\n print(f\" {format_period_label(period)}: {shot_total:,} shots\")\n\nfor period in periods_for_figures:\n n_rows = int(np.ceil(len(seasons) / SEASON_PANEL_COLUMNS))\n fig, axes = plt.subplots(\n n_rows,\n SEASON_PANEL_COLUMNS,\n figsize=(SEASON_PANEL_COLUMNS * 4.2, n_rows * 3.8),\n )\n axes = np.atleast_1d(axes).ravel()\n used_axes = []\n last_hexbin = None\n\n for ax, season in zip(axes, seasons):\n draw_half_rink(ax)\n ax.set_xlabel(\"\")\n ax.set_ylabel(\"\")\n used_axes.append(ax)\n\n season_key = (season, period)\n coords = season_period_shots.get(season_key)\n shot_count = len(coords[\"x\"]) if coords is not None else 0\n\n if shot_count > 0:\n last_hexbin = ax.hexbin(\n coords[\"x\"],\n coords[\"y\"],\n gridsize=HEXBIN_GRIDSIZE,\n extent=HALF_RINK_EXTENT,\n cmap=HEATMAP_CMAP,\n mincnt=HEXBIN_MIN_COUNT,\n bins=HEATMAP_BINS,\n )\n else:\n ax.text(\n 0.5,\n 0.5,\n \"No shots\",\n transform=ax.transAxes,\n ha=\"center\",\n va=\"center\",\n fontsize=10,\n color=\"#7f8c8d\",\n )\n\n ax.set_title(f\"{format_season_label(season)} (n={shot_count:,})\", fontsize=10)\n\n for ax in axes[len(seasons):]:\n ax.axis(\"off\")\n\n if last_hexbin is not None and used_axes:\n colorbar = fig.colorbar(last_hexbin, ax=used_axes, shrink=0.92, pad=0.01)\n colorbar.set_label(\"Shot count per hex (log scale)\")\n\n fig.suptitle(\n f\"Post-faceoff shot locations by season — {format_period_label(period)}\",\n fontsize=16,\n y=0.995,\n )\n fig.tight_layout(rect=(0.0, 0.0, 1.0, 0.985))\n plt.show()" }, { "cell_type": "markdown", "id": "qg0mg9kh6ft", "metadata": {}, - "source": "## 9. Summary and recommendations\n\n**Interpret the zone-separated plots above to answer:**\n\n1. **Zone-specific bin boundaries:** Do the 5/15/30/60s boundaries align with inflection points *within each zone*? Offensive-zone faceoffs may need a tighter initial window (e.g., 3s instead of 5s) if the spike is sharper. Defensive-zone faceoffs may show a flatter curve where the 5s \"immediate\" bin captures no meaningful signal. Consider defining zone-specific bin boundaries in `_FACEOFF_RECENCY_BINS` if decay shapes differ substantially.\n\n2. **Decay rate differences:** Compare the per-zone exponential fit half-lives. If offensive-zone half-life is significantly shorter than defensive-zone, this confirms that faceoff zone and recency interact — a key justification for zone-specific features rather than a single pooled decay model.\n\n3. **Convergence point:** The zone convergence analysis (section 5) identifies where faceoff zone context stops adding information. Beyond this point, zone-specific features add noise, not signal. Use this to set the upper bound of the faceoff recency window.\n\n4. **Manpower × zone interaction:** If PP faceoffs in the offensive zone show a dramatically different decay pattern from PP faceoffs in the defensive zone (section 6), consider a three-way interaction feature: `zone × manpower × recency`.\n\n5. **Distance patterns:** The per-zone distance curves (section 8) explain *why* zones produce different goal rates. Offensive-zone faceoffs generate closer shots initially; the distance advantage decays as play transitions. This mechanistic understanding validates treating zones as distinct events.\n\n6. **Continuous vs. binned:** Compare per-zone R-squared values. If the exponential fit quality varies by zone, the optimal representation may differ: bins for zones with step-function breaks, `log(seconds + 1)` for zones with smooth decay. A pragmatic approach: include `zone × log(seconds + 1)` as zone-specific continuous features alongside zone-specific bins, and let the model select." + "source": "## 10. Summary and recommendations\n\n**Interpret the zone-separated plots above to answer:**\n\n1. **Zone-specific bin boundaries:** Do the 5/15/30/60s boundaries align with inflection points *within each zone*? Offensive-zone faceoffs may need a tighter initial window (e.g., 3s instead of 5s) if the spike is sharper. Defensive-zone faceoffs may show a flatter curve where the 5s \"immediate\" bin captures no meaningful signal. Consider defining zone-specific bin boundaries in `_FACEOFF_RECENCY_BINS` if decay shapes differ substantially.\n\n2. **Decay rate differences:** Compare the per-zone exponential fit half-lives. If offensive-zone half-life is significantly shorter than defensive-zone, this confirms that faceoff zone and recency interact — a key justification for zone-specific features rather than a single pooled decay model.\n\n3. **Convergence point:** The zone convergence analysis (section 5) identifies where faceoff zone context stops adding information. Beyond this point, zone-specific features add noise, not signal. Use this to set the upper bound of the faceoff recency window.\n\n4. **Manpower × zone interaction:** If PP faceoffs in the offensive zone show a dramatically different decay pattern from PP faceoffs in the defensive zone (section 6), consider a three-way interaction feature: `zone × manpower × recency`.\n\n5. **Distance patterns:** The per-zone distance curves (section 8) and the season × period hexbins (section 9) explain *why* zones produce different goal rates and make it easier to spot season-specific coordinate anomalies. Offensive-zone faceoffs generate closer shots initially; the distance advantage decays as play transitions. This mechanistic understanding validates treating zones as distinct events.\n\n6. **Continuous vs. binned:** Compare per-zone R-squared values. If the exponential fit quality varies by zone, the optimal representation may differ: bins for zones with step-function breaks, `log(seconds + 1)` for zones with smooth decay. A pragmatic approach: include `zone × log(seconds + 1)` as zone-specific continuous features alongside zone-specific bins, and let the model select." }, { "cell_type": "code",