diff --git a/solarfarmer/__init__.py b/solarfarmer/__init__.py index c15143a..e748d72 100644 --- a/solarfarmer/__init__.py +++ b/solarfarmer/__init__.py @@ -58,7 +58,13 @@ TransformerSpecification, ValidationMessage, ) -from .weather import TSV_COLUMNS, check_sequential_year_timestamps, from_dataframe, from_pvlib +from .weather import ( + TSV_COLUMNS, + check_sequential_year_timestamps, + from_dataframe, + from_pvlib, + from_solcast, +) __all__ = [ "__version__", @@ -122,5 +128,6 @@ "ValidationMessage", "from_dataframe", "from_pvlib", + "from_solcast", "check_sequential_year_timestamps", ] diff --git a/solarfarmer/weather.py b/solarfarmer/weather.py index 3726ae5..cd304af 100644 --- a/solarfarmer/weather.py +++ b/solarfarmer/weather.py @@ -60,6 +60,43 @@ ) df.index.name = "DateTime" df.to_csv("weather.tsv", sep="\\t") + +Solcast Column Mapping +~~~~~~~~~~~~~~~~~~~~~~ +When converting a Solcast DataFrame to SolarFarmer TSV format, the following +column mapping and unit conversions are applied automatically by +:func:`from_solcast`. + +Solcast ``period_end`` timestamps are shifted to period-beginning by +subtracting the inferred time resolution (e.g. −30 min for 30-min data). +``precipitable_water`` is in kg/m² (equivalent to mm) and is divided by 10 +to obtain cm as required by SolarFarmer. ``surface_pressure`` is already in +hPa which equals mbar, so no pressure conversion is needed. + +Only columns that are present in the DataFrame are mapped; ``period_end``, +``air_temp``, and ``ghi`` are the most commonly available columns but the +others are all optional. The ``gti`` (plane-of-array irradiance) column is +not mapped; SolarFarmer derives POA irradiance internally from GHI/DHI. + +======================= =========== ===================================== +Solcast column SF column Unit conversion +======================= =========== ===================================== +``ghi`` ``GHI`` W/m² → W/m² (none) +``dhi`` ``DHI`` W/m² → W/m² (none) +``air_temp`` ``TAmb`` °C → °C (none) +``wind_speed_10m`` ``WS`` m/s → m/s (none) +``surface_pressure`` ``Pressure`` hPa → mbar (hPa = mbar, none) +``precipitable_water`` ``Water`` kg/m² → cm (÷ 10) +``relative_humidity`` ``RH`` % → % (none) +``albedo`` ``Albedo`` fraction → fraction (none) +``hsu_loss_fraction`` ``Soiling`` fraction → fraction (none) +``kimber_loss_fraction`` ``Soiling`` fraction → fraction (none) +``soiling`` ``Soiling`` fraction → fraction (none) +======================= =========== ===================================== + +.. note:: + Only one soiling column should be present in the DataFrame. If multiple + are provided, the last one wins after column renaming. """ from __future__ import annotations @@ -78,6 +115,8 @@ "check_sequential_year_timestamps", "from_dataframe", "from_pvlib", + "from_solcast", + "shift_period_end_to_beginning", ] @@ -130,6 +169,20 @@ def check_sequential_year_timestamps(file_path: str | pathlib.Path) -> None: "pressure": "Pressure", } +SOLCAST_COLUMN_MAP: dict[str, str] = { + "ghi": "GHI", + "dhi": "DHI", + "air_temp": "TAmb", + "wind_speed_10m": "WS", + "surface_pressure": "Pressure", + "precipitable_water": "Water", + "relative_humidity": "RH", + "albedo": "Albedo", + "hsu_loss_fraction": "Soiling", + "kimber_loss_fraction": "Soiling", + "soiling": "Soiling", +} + def from_dataframe( df: pd.DataFrame, @@ -241,6 +294,131 @@ def from_pvlib( ) +def from_solcast( + df: pd.DataFrame, + output_path: str | pathlib.Path, +) -> pathlib.Path: + """Convert a Solcast DataFrame to a SolarFarmer TSV weather file. + + Wrapper around :func:`from_dataframe` with the standard Solcast column + mapping. Two automatic conversions are applied before writing: + + * **Timestamp shift**: Solcast timestamps represent ``period_end``; + SolarFarmer expects ``period_beginning``. The time resolution is + inferred from the minimum consecutive time difference and subtracted + from every timestamp. + * **Precipitable water**: Solcast ``precipitable_water`` is in kg/m² + (equivalent to mm); SolarFarmer expects cm, so the column is divided + by 10. + + ``surface_pressure`` is already in hPa which equals mbar, so no pressure + conversion is needed. + + Only columns that are present in the DataFrame are mapped; the minimum + required columns are ``period_end`` (as the index), ``air_temp``, and + ``ghi``. All other columns (``dhi``, ``wind_speed_10m``, + ``surface_pressure``, ``precipitable_water``, ``relative_humidity``, + ``albedo``, ``hsu_loss_fraction``, ``kimber_loss_fraction``, ``soiling``) + are optional and mapped when present. + + .. note:: Requires ``pandas``. Install with ``pip install 'dnv-solarfarmer[all]'``. + + Parameters + ---------- + df : pandas.DataFrame + Solcast-style DataFrame with a DatetimeIndex (``period_end``) and + any subset of columns: ``ghi``, ``dhi``, ``air_temp``, + ``wind_speed_10m``, ``surface_pressure``, ``precipitable_water``, + ``relative_humidity``, ``albedo``, ``hsu_loss_fraction``, + ``kimber_loss_fraction``, ``soiling``. Unmapped columns are removed. + Only one soiling column should be present; if multiple are provided, + the last one wins after column renaming. + output_path : str or Path + Destination file path. + + Returns + ------- + pathlib.Path + + Raises + ------ + ValueError + If the DataFrame has no DatetimeIndex. + ImportError + If pandas is not installed. + """ + try: + import pandas as pd + except ImportError: + raise ImportError(PANDAS_INSTALL_MSG) from None + + if not isinstance(df.index, pd.DatetimeIndex): + raise ValueError( + "DataFrame must have a DatetimeIndex. " + "Use df.set_index(pd.to_datetime(df['period_end'])) or similar." + ) + + out = df.copy() + + # Solcast timestamps are period_end; SolarFarmer expects period_beginning. + out = shift_period_end_to_beginning(out) + + # Drop columns that have no SolarFarmer equivalent (e.g. gti, any other custom fields). + out = out[[c for c in out.columns if c in SOLCAST_COLUMN_MAP]] + + # precipitable_water: Solcast provides kg/m² (= mm); SolarFarmer expects cm. + if "precipitable_water" in out.columns: + out["precipitable_water"] = out["precipitable_water"] / 10 + + return from_dataframe( + out, + output_path, + column_map=SOLCAST_COLUMN_MAP, + pressure_pa_to_mbar=False, # Solcast surface_pressure is hPa = mbar + ) + + +def shift_period_end_to_beginning(df: pd.DataFrame) -> pd.DataFrame: + """Shift DatetimeIndex from period_end to period_beginning. + + Infers the time resolution from the minimum consecutive time difference + and subtracts it from all timestamps. Useful for converters where the + source data provides period_end timestamps but the target format expects + period_beginning. + + Parameters + ---------- + df : pandas.DataFrame + DataFrame with a DatetimeIndex representing period_end timestamps. + + Returns + ------- + pandas.DataFrame + DataFrame with DatetimeIndex shifted to period_beginning. + + Raises + ------ + ValueError + If the DataFrame has no DatetimeIndex. + """ + try: + import pandas as pd + except ImportError: + raise ImportError(PANDAS_INSTALL_MSG) from None + + if not isinstance(df.index, pd.DatetimeIndex): + raise ValueError( + "DataFrame must have a DatetimeIndex. " + "Use df.set_index(pd.to_datetime(df['period_end'])) or similar." + ) + + out = df.copy() + time_deltas = out.index.to_series().diff().dropna() + inferred_timedelta = time_deltas.median() + out.index = out.index - inferred_timedelta + return out + + TSV_COLUMNS: dict = { "required": [ { diff --git a/tests/test_weather.py b/tests/test_weather.py index 4a872e5..ee70973 100644 --- a/tests/test_weather.py +++ b/tests/test_weather.py @@ -7,9 +7,12 @@ from solarfarmer.weather import ( PVLIB_COLUMN_MAP, + SOLCAST_COLUMN_MAP, check_sequential_year_timestamps, from_dataframe, from_pvlib, + from_solcast, + shift_period_end_to_beginning, ) @@ -186,3 +189,124 @@ def test_output_passes_validation(self, tmp_path, pvlib_df): """TSV written by from_pvlib should pass check_sequential_year_timestamps.""" out = from_pvlib(pvlib_df, tmp_path / "out.tsv") check_sequential_year_timestamps(out) # should not raise + + +class TestFromSolcast: + """Tests for from_solcast() convenience wrapper.""" + + @pytest.fixture + def solcast_df(self): + pd = pytest.importorskip("pandas") + idx = pd.date_range("1990-01-01 00:30", periods=3, freq="30min", tz="UTC") + return pd.DataFrame( + { + "ghi": [0, 500, 800], + "dhi": [0, 200, 300], + "air_temp": [5.0, 15.0, 25.0], + "wind_speed_10m": [2.0, 3.0, 4.0], + "surface_pressure": [1013.25, 1013.25, 1013.25], + "precipitable_water": [1.0, 2.0, 3.0], + }, + index=idx, + ) + + def test_columns_renamed(self, tmp_path, solcast_df): + out = from_solcast(solcast_df, tmp_path / "out.tsv") + header = out.read_text().splitlines()[0] + # Only columns present in the fixture are mapped + for solcast_col, sf_col in SOLCAST_COLUMN_MAP.items(): + if solcast_col in solcast_df.columns: + assert sf_col in header + + def test_pressure_not_converted(self, tmp_path, solcast_df): + """surface_pressure is hPa = mbar; no conversion should be applied.""" + out = from_solcast(solcast_df, tmp_path / "out.tsv") + lines = out.read_text().splitlines() + header = lines[0].split("\t") + pressure_idx = header.index("Pressure") + first_data = lines[1].split("\t") + assert float(first_data[pressure_idx]) == pytest.approx(1013.25) + + def test_precipitable_water_converted(self, tmp_path, solcast_df): + """precipitable_water is kg/m² (= mm), must be divided by 10 to get cm.""" + out = from_solcast(solcast_df, tmp_path / "out.tsv") + lines = out.read_text().splitlines() + header = lines[0].split("\t") + water_idx = header.index("Water") + first_data = lines[1].split("\t") + assert float(first_data[water_idx]) == pytest.approx(0.1) # 1.0 / 10 + + def test_timestamp_shifted_to_period_beginning(self, tmp_path, solcast_df): + """Solcast period_end timestamps must be shifted back by the time resolution.""" + out = from_solcast(solcast_df, tmp_path / "out.tsv") + first_data = out.read_text().splitlines()[1] + # Original index starts at 00:30; shifted by -30 min → 00:00 + assert "T00:00" in first_data + + def test_unknown_columns_dropped(self, tmp_path): + """Columns not in SOLCAST_COLUMN_MAP (e.g. gti) are dropped.""" + pd = pytest.importorskip("pandas") + idx = pd.date_range("1990-01-01 01:00", periods=2, freq="h", tz="UTC") + df = pd.DataFrame( + {"ghi": [0, 500], "air_temp": [5.0, 15.0], "gti": [100.0, 200.0]}, + index=idx, + ) + out = from_solcast(df, tmp_path / "out.tsv") + header = out.read_text().splitlines()[0] + assert "gti" not in header + + def test_no_datetimeindex_raises(self, tmp_path): + pd = pytest.importorskip("pandas") + df = pd.DataFrame({"ghi": [0, 100], "air_temp": [5.0, 15.0]}) + with pytest.raises(ValueError, match="DatetimeIndex"): + from_solcast(df, tmp_path / "out.tsv") + + def test_output_passes_validation(self, tmp_path, solcast_df): + """TSV written by from_solcast should pass check_sequential_year_timestamps.""" + out = from_solcast(solcast_df, tmp_path / "out.tsv") + check_sequential_year_timestamps(out) # should not raise + + @pytest.mark.parametrize( + "soiling_col", ["hsu_loss_fraction", "kimber_loss_fraction", "soiling"] + ) + def test_soiling_columns_mapped(self, tmp_path, soiling_col): + """hsu_loss_fraction, kimber_loss_fraction, and soiling all map to Soiling.""" + pd = pytest.importorskip("pandas") + idx = pd.date_range("1990-01-01 00:30", periods=3, freq="30min", tz="UTC") + df = pd.DataFrame( + {"ghi": [0, 500, 800], "air_temp": [5.0, 15.0, 25.0], soiling_col: [0.01, 0.02, 0.03]}, + index=idx, + ) + out = from_solcast(df, tmp_path / "out.tsv") + header = out.read_text().splitlines()[0].split("\t") + assert "Soiling" in header + soiling_idx = header.index("Soiling") + first_data = out.read_text().splitlines()[1].split("\t") + assert float(first_data[soiling_idx]) == pytest.approx(0.01) + + +class TestShiftPeriodEndToBeginning: + """Tests for shift_period_end_to_beginning().""" + + def test_shifts_timestamps_by_time_resolution(self): + """Timestamps should be shifted back by the inferred time resolution.""" + pd = pytest.importorskip("pandas") + # Create 30-minute resolution data starting at 00:30 + idx = pd.date_range("1990-01-01 00:30", periods=3, freq="30min", tz="UTC") + df = pd.DataFrame({"ghi": [0, 100, 200]}, index=idx) + + result = shift_period_end_to_beginning(df) + + # Result should be shifted back by 30 minutes + expected_idx = pd.date_range("1990-01-01 00:00", periods=3, freq="30min", tz="UTC") + pd.testing.assert_index_equal(result.index, expected_idx) + # Data values should be unchanged (indices differ, so compare values only) + assert list(result["ghi"].values) == list(df["ghi"].values) + + def test_no_datetimeindex_raises(self): + """Should raise ValueError when DataFrame has no DatetimeIndex.""" + pd = pytest.importorskip("pandas") + df = pd.DataFrame({"ghi": [0, 100, 200]}) + + with pytest.raises(ValueError, match="DatetimeIndex"): + shift_period_end_to_beginning(df)