From 1c946ece9dd93195c5d2519b4e5d17800bdea03d Mon Sep 17 00:00:00 2001 From: ss77995ss Date: Thu, 25 Jun 2026 03:37:37 +0800 Subject: [PATCH 1/3] feat(Fangraphs): Implement Fangraphs searching functions base on their leaderboard API --- example.py | 14 ++ src/baseball_stats_python/__init__.py | 4 + .../fangraphs/__init__.py | 0 .../fangraphs/fangraphs_search.py | 174 ++++++++++++++++++ src/baseball_stats_python/utils/utils.py | 8 + tests/fangraphs/test_fangraphs_search.py | 74 ++++++++ tests/utils/test_utils.py | 11 +- 7 files changed, 284 insertions(+), 1 deletion(-) create mode 100644 src/baseball_stats_python/fangraphs/__init__.py create mode 100644 src/baseball_stats_python/fangraphs/fangraphs_search.py create mode 100644 tests/fangraphs/test_fangraphs_search.py diff --git a/example.py b/example.py index 72f66c9..35b7601 100644 --- a/example.py +++ b/example.py @@ -1,6 +1,8 @@ """Example usage of the baseball_stats_python package.""" from src.baseball_stats_python import ( + fg_batting, + fg_pitching, minor_statcast_search, mlbam_id_search, statcast_search, @@ -50,8 +52,20 @@ def wbc_example(): print(df) +def fg_batting_example(): + df = fg_batting(qual="y") + print(df) + + +def fg_pitching_example(): + df = fg_pitching(season=2025, debug=True) + print(df) + + # example() # minor_example() # mlbam_id_example() # spring_training_example() # wbc_example() +# fg_batting_example() +# fg_pitching_example() diff --git a/src/baseball_stats_python/__init__.py b/src/baseball_stats_python/__init__.py index 33dcdf0..42a0f5b 100644 --- a/src/baseball_stats_python/__init__.py +++ b/src/baseball_stats_python/__init__.py @@ -1,3 +1,4 @@ +from .fangraphs.fangraphs_search import fangraphs_search, fg_batting, fg_pitching from .statcast.catcher_throwing import catcher_throwing from .statcast.minor_statcast_search import ( minor_statcast_batter_search, @@ -32,4 +33,7 @@ "wbc_statcast_search", "wbc_statcast_pitcher_search", "wbc_statcast_batter_search", + "fangraphs_search", + "fg_batting", + "fg_pitching", ] diff --git a/src/baseball_stats_python/fangraphs/__init__.py b/src/baseball_stats_python/fangraphs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/baseball_stats_python/fangraphs/fangraphs_search.py b/src/baseball_stats_python/fangraphs/fangraphs_search.py new file mode 100644 index 0000000..f08c16b --- /dev/null +++ b/src/baseball_stats_python/fangraphs/fangraphs_search.py @@ -0,0 +1,174 @@ +import json +import logging + +import pandas as pd +import requests + +from ..utils.utils import extract_text_from_html + +FANGRAPHS_MAIN_URL = "https://www.fangraphs.com/api/leaders/major-league/data" + +logging.basicConfig() +logger = logging.getLogger("Fangraphs") + + +def fangraphs_search( + stats: str = "bat", + age: int = 0, + pos: str = "all", + lg: str = "all", + qual: int | str = "y", + season: int = 2026, + season1: int = 2026, + start_date: str = "2026-03-01", + end_date: str = "2026-11-01", + month: int = 0, + hand: str = "", + team: str = "", + pageitems: int = 10000, + pagenum: int = 1, + ind: int = 0, + rost: int = 0, + players: str = "", + type: int = 8, + postseason: str = "", + heatmapqual: str = "", + sortdir: str = "default", + sortstat: str = "WAR", + debug: bool = False, +) -> pd.DataFrame: + """ + Get leaderboard data from the Fangraphs major-league leaders API. + + Mirrors the query parameters of the Fangraphs leaderboard page + (https://www.fangraphs.com/leaders/major-league). All filters are optional; + the defaults return the current-season qualified leaderboard. + Not all arguments' descriptions are accurate. Still need to be updated. + + Args: + stats (str): Stat group to fetch — "bat" for batting or "pit" for pitching. + age (int): Filter by player age. 0 means no age filter. + pos (str): Position filter (e.g. "all", "c", "1b", "of", "np" for non-pitchers). + lg (str): League filter — "all", "al", or "nl". + qual (int | str): Plate-appearance/innings qualifier. "y" for the qualified + threshold, or an integer for a custom minimum; 0 means no minimum. + season (int): End season of the range to query. + season1 (int): Start season of the range. Equals `season` for a single season. + start_date (str): Range start date in "YYYY-MM-DD" format (should set month to 1000 to activate this filter). + end_date (str): Range end date in "YYYY-MM-DD" format (should set month to 1000 to activate this filter). + month (int): Split by calendar month/period. 0 means full season. 1000 means use start_date and end_date to filter by date. + hand (str): Batter/pitcher handedness filter — "R", "L", or "" for both. + team (str): Team filter by Fangraphs team id; "" means all teams. + pageitems (int): Number of rows per page. default set to 10000 to get all possible rows. + pagenum (int): Page number to fetch. + ind (int): Split seasons individually (1) or aggregate the range into one row (0). + rost (int): Roster filter — 0 for all players, 1 for active roster only. + players (str): Filter to specific player id(s); "" means all players. + type (int): Stat dashboard/column set id (8 is the default dashboard). + postseason (str): Set to a truthy value to query postseason stats; "" for regular season. + heatmapqual (str): Heatmap qualifier flag passed through to the API. + sortdir (str): Sort direction — "default", "asc", or "desc". + sortstat (str): Column to sort by (e.g. "WAR"). + debug (bool): If True, raise the logger to DEBUG level to print the request params and URL. + + Returns: + pd.DataFrame: The leaderboard rows, with HTML stripped from the Name and Team + columns. An empty DataFrame is returned when the API has no matching data. + """ + if debug: + logger.setLevel(logging.DEBUG) + + params = { + "age": age, + "pos": pos, + "stats": stats, + "lg": lg, + "qual": qual, + "season": season, + "season1": season1, + "startdate": start_date, + "enddate": end_date, + "month": month, + "hand": hand, + "team": team, + "pageitems": pageitems, + "pagenum": pagenum, + "ind": ind, + "rost": rost, + "players": players, + "type": type, + "postseason": postseason, + "heatmapqual": heatmapqual, + "sortdir": sortdir, + "sortstat": sortstat, + } + + logger.debug(f"Params: {params}") + + try: + response = requests.get(FANGRAPHS_MAIN_URL, params=params) + response.raise_for_status() + + logger.debug(response.url) + + response_content = response.content + data = json.loads(response_content) + df = pd.DataFrame(data["data"]) + + if df.empty: + logger.warning("No data found") + return df + + df["Name"] = df["Name"].apply(extract_text_from_html) + df["Team"] = df["Team"].apply(extract_text_from_html) + + return df + + except requests.exceptions.RequestException as e: + raise Exception(f"Failed to fetch data: {e}") + + +def fg_batting( + qual: int | str = 0, season: int = 2026, debug: bool = False +) -> pd.DataFrame: + """ + Get the batting leaderboard from Fangraphs. + + Convenience wrapper around `fangraphs_search` with stats="bat". + Currently only supports single season queries. Use `fangraphs_search` for multiple season queries. + + Args: + qual (int | str): Plate-appearance qualifier — "y" for the qualified + threshold, an integer for a custom minimum, or 0 for no minimum. + season (int): Season to query. + debug (bool): If True, log the request params and URL at DEBUG level. + + Returns: + pd.DataFrame: The batting leaderboard, with HTML stripped from Name and Team. + """ + return fangraphs_search( + stats="bat", qual=qual, season=season, season1=season, debug=debug + ) + + +def fg_pitching( + qual: int | str = 0, season: int = 2026, debug: bool = False +) -> pd.DataFrame: + """ + Get the pitching leaderboard from Fangraphs. + + Convenience wrapper around `fangraphs_search` with stats="pit". + Currently only supports single season queries. Use `fangraphs_search` for multiple season queries. + + Args: + qual (int | str): Innings-pitched qualifier — "y" for the qualified + threshold, an integer for a custom minimum, or 0 for no minimum. + season (int): Season to query. + debug (bool): If True, log the request params and URL at DEBUG level. + + Returns: + pd.DataFrame: The pitching leaderboard, with HTML stripped from Name and Team. + """ + return fangraphs_search( + stats="pit", qual=qual, season=season, season1=season, debug=debug + ) diff --git a/src/baseball_stats_python/utils/utils.py b/src/baseball_stats_python/utils/utils.py index 2f93764..40b4f00 100644 --- a/src/baseball_stats_python/utils/utils.py +++ b/src/baseball_stats_python/utils/utils.py @@ -1,6 +1,14 @@ +import re from datetime import datetime +def extract_text_from_html(text): + try: + return re.search(">(.+?)<", text).group(1) + except AttributeError: + return text + + def validate_date_format(dt: str) -> datetime: date_format = '%Y-%m-%d' try: diff --git a/tests/fangraphs/test_fangraphs_search.py b/tests/fangraphs/test_fangraphs_search.py new file mode 100644 index 0000000..f9d420b --- /dev/null +++ b/tests/fangraphs/test_fangraphs_search.py @@ -0,0 +1,74 @@ +import json + +import pandas as pd +import pytest +import requests + +from baseball_stats_python.fangraphs.fangraphs_search import fg_batting, fg_pitching + + +def make_fake_get(captured, data): + def fake_get(url, params=None): + captured["params"] = params + + class FakeResponse: + content = json.dumps({"data": data}).encode() + url = "https://fake" + + def raise_for_status(self): + pass + + return FakeResponse() + + return fake_get + + +def test_fg_batting_cleans_html_and_sends_bat_stats(monkeypatch): + captured = {} + monkeypatch.setattr( + requests, + "get", + make_fake_get(captured, [{"Name": 'Mike Trout', "Team": "LAA"}]), + ) + + df = fg_batting() + + assert isinstance(df, pd.DataFrame) + assert df["Name"].iloc[0] == "Mike Trout" + assert df["Team"].iloc[0] == "LAA" + assert captured["params"]["stats"] == "bat" + + +def test_fg_pitching_sends_pit_stats(monkeypatch): + captured = {} + monkeypatch.setattr( + requests, + "get", + make_fake_get(captured, [{"Name": 'Gerrit Cole', "Team": "NYY"}]), + ) + + df = fg_pitching() + + assert df["Name"].iloc[0] == "Gerrit Cole" + assert captured["params"]["stats"] == "pit" + + +def test_fg_search_returns_empty_dataframe(monkeypatch): + captured = {} + monkeypatch.setattr(requests, "get", make_fake_get(captured, [])) + + df = fg_batting() + + assert isinstance(df, pd.DataFrame) + assert df.empty + + +def test_fg_search_wraps_request_exception(monkeypatch): + def boom(url, params=None): + raise requests.exceptions.RequestException("network down") + + monkeypatch.setattr(requests, "get", boom) + + with pytest.raises(Exception) as e: + fg_batting() + assert str(e.value).startswith("Failed to fetch data:") diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 89e52bb..d8d9d2e 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -2,7 +2,16 @@ import pytest -from baseball_stats_python.utils.utils import validate_date_format, validate_date_range +from baseball_stats_python.utils.utils import ( + extract_text_from_html, + validate_date_format, + validate_date_range, +) + + +def test_extract_text_from_html(): + assert extract_text_from_html('Mike Trout') == 'Mike Trout' + assert extract_text_from_html('plain text') == 'plain text' def test_validate_date_format(): From dbd0b6529c2cf3af288104d91bb801e157f98cd5 Mon Sep 17 00:00:00 2001 From: ss77995ss Date: Fri, 26 Jun 2026 01:07:33 +0800 Subject: [PATCH 2/3] feat(Fangraphs): Add documentation for `fangraphs_search` --- README.md | 21 +++++++ docs/fangraphs_search.md | 120 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 docs/fangraphs_search.md diff --git a/README.md b/README.md index 533bd59..6e17252 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,27 @@ darvish_mlbam_id = mlbam_id_search('Yu Darvish').iloc[0]['id'] statcast_pitcher_search(pitchers_lookup=darvish_mlbam_id) ``` +#### `fangraphs_search` + +Get batting or pitching leaderboard data from Fangraphs' [major-league leaders API](https://www.fangraphs.com/leaders/major-league). `fg_batting` and `fg_pitching` are convenience wrappers for single-season queries. See documentation [here](docs/fangraphs_search.md). + +```python +from baseball_stats_python import fangraphs_search, fg_batting, fg_pitching + +# Get the 2026 qualified batting leaderboard +fg_batting(season=2026) + +# Get the 2026 qualified pitching leaderboard +fg_pitching(season=2026) + +# Use fangraphs_search directly for multi-season or finer-grained queries +fangraphs_search( + stats="bat", + season1=2024, + season=2026 +) +``` + ## Contributing Welcome to open issues or pull requests to contribute to this project. Please read [CONTRIBUTING.md](https://github.com/ss77995ss/baseball-stats-python/blob/main/CONTRIBUTING.md) for more details. diff --git a/docs/fangraphs_search.md b/docs/fangraphs_search.md new file mode 100644 index 0000000..5c14d92 --- /dev/null +++ b/docs/fangraphs_search.md @@ -0,0 +1,120 @@ +# Fangraphs Search + +## `fangraphs_search` + +Function to get leaderboard data from the Fangraphs [major-league leaders API](https://www.fangraphs.com/leaders/major-league) with custom filters. The `stats` argument selects the batting (`bat`) or pitching (`pit`) leaderboard. + +**Notification:** Not all argument descriptions are fully verified against the Fangraphs API yet. + +**Examples** + +```python +from baseball_stats_python import fangraphs_search + +# Get the 2026 qualified batting leaderboard +fangraphs_search( + stats="bat", + season=2026 +) + +# Get the qualified pitching leaderboard across multiple seasons (2024-2026) +fangraphs_search( + stats="pit", + season1=2024, + season=2026 +) + +# Filter by a date range (set month=1000 to activate the date filter) +fangraphs_search( + stats="bat", + month=1000, + start_date="2026-04-01", + end_date="2026-04-30" +) + +# Add debug=True to see more information +fangraphs_search( + stats="bat", + debug=True +) +``` + +**Arguments** + +| Argument | Data Type | Description | Default | +| ----------- | -------------- | ----------------------------------------------------------------------------------------------------- | -------------- | +| stats | `str` | Stat group to fetch — `bat` for batting or `pit` for pitching. | "bat" | +| age | `int` | Filter by player age. `0` means no age filter. | 0 | +| pos | `str` | Position filter (e.g. `all`, `c`, `1b`, `of`, `np` for non-pitchers). | "all" | +| lg | `str` | League filter — `all`, `al`, or `nl`. | "all" | +| qual | `int` or `str` | Plate-appearance/innings qualifier. `y` for the qualified threshold, an integer for a custom minimum, or `0` for no minimum. | "y" | +| season | `int` | End season of the range to query. | Current season | +| season1 | `int` | Start season of the range. Equals `season` for a single season. | Current season | +| start_date | `str` | Range start date in `YYYY-MM-DD` format (set `month=1000` to activate this filter). | "2026-03-01" | +| end_date | `str` | Range end date in `YYYY-MM-DD` format (set `month=1000` to activate this filter). | "2026-11-01" | +| month | `int` | Split by calendar month/period. `0` means full season. `1000` means use `start_date`/`end_date`. | 0 | +| hand | `str` | Batter/pitcher handedness filter — `R`, `L`, or `""` for both. | "" | +| team | `str` | Team filter by Fangraphs team id; `""` means all teams. | "" | +| pageitems | `int` | Number of rows per page. Default is high enough to return all rows. | 10000 | +| pagenum | `int` | Page number to fetch. | 1 | +| ind | `int` | Split seasons individually (`1`) or aggregate the range into one row (`0`). | 0 | +| rost | `int` | Roster filter — `0` for all players, `1` for active roster only. | 0 | +| players | `str` | Filter to specific player id(s); `""` means all players. | "" | +| type | `int` | Stat dashboard/column set id (`8` is the default dashboard). | 8 | +| postseason | `str` | Set to a truthy value to query postseason stats; `""` for regular season. | "" | +| heatmapqual | `str` | Heatmap qualifier flag passed through to the API. | "" | +| sortdir | `str` | Sort direction — `default`, `asc`, or `desc`. | "default" | +| sortstat | `str` | Column to sort by (e.g. `WAR`). | "WAR" | +| debug | `bool` | Whether to print debug information (request params and URL). | False | + +**Return** + +A DataFrame of the leaderboard rows, with HTML stripped from the `Name` and `Team` columns. An empty DataFrame is returned when the API has no matching data. + +## `fg_batting` + +Convenience wrapper around `fangraphs_search` with `stats="bat"`. Currently only supports single-season queries — use `fangraphs_search` for multi-season queries. + +**Examples** + +```python +from baseball_stats_python import fg_batting + +# Get the 2026 qualified batting leaderboard +fg_batting(season=2026) + +# Use a custom plate-appearance minimum instead of the qualified threshold +fg_batting(qual=100, season=2026) +``` + +**Arguments** + +| Argument | Data Type | Description | Default | +| -------- | -------------- | --------------------------------------------------------------------------------------------------- | -------------- | +| qual | `int` or `str` | Plate-appearance qualifier — `y` for the qualified threshold, an integer for a custom minimum, or `0` for no minimum. | 0 | +| season | `int` | Season to query. | Current season | +| debug | `bool` | Whether to print debug information. | False | + +## `fg_pitching` + +Convenience wrapper around `fangraphs_search` with `stats="pit"`. Currently only supports single-season queries — use `fangraphs_search` for multi-season queries. + +**Examples** + +```python +from baseball_stats_python import fg_pitching + +# Get the 2026 qualified pitching leaderboard +fg_pitching(season=2026) + +# Use a custom innings-pitched minimum instead of the qualified threshold +fg_pitching(qual=50, season=2026) +``` + +**Arguments** + +| Argument | Data Type | Description | Default | +| -------- | -------------- | --------------------------------------------------------------------------------------------------- | -------------- | +| qual | `int` or `str` | Innings-pitched qualifier — `y` for the qualified threshold, an integer for a custom minimum, or `0` for no minimum. | 0 | +| season | `int` | Season to query. | Current season | +| debug | `bool` | Whether to print debug information. | False | From 69619e50bffde6e245899c8f24a8c247b84cec35 Mon Sep 17 00:00:00 2001 From: ss77995ss Date: Fri, 26 Jun 2026 01:17:55 +0800 Subject: [PATCH 3/3] chore(Github Action): Update ruff-action version --- .github/workflows/publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 43d0279..4ed190b 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -33,7 +33,7 @@ jobs: run: | python -m pip install --upgrade pip pip install ruff - - uses: astral-sh/ruff-action@v3 + - uses: astral-sh/ruff-action@v4.0.0 with: args: 'check'