dnv-opensource · javlor · Apr 21, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/solarfarmer/__init__.py b/solarfarmer/__init__.py
@@ -58,7 +58,13 @@
     TransformerSpecification,
     ValidationMessage,
 )
-from .weather import TSV_COLUMNS, check_sequential_year_timestamps, from_dataframe, from_pvlib
+from .weather import (
+    TSV_COLUMNS,
+    check_sequential_year_timestamps,
+    from_dataframe,
+    from_pvlib,
+    from_solcast,
+)
 
 __all__ = [
     "__version__",
@@ -122,5 +128,6 @@
     "ValidationMessage",
     "from_dataframe",
     "from_pvlib",
+    "from_solcast",
     "check_sequential_year_timestamps",
 ]
diff --git a/solarfarmer/weather.py b/solarfarmer/weather.py
@@ -60,6 +60,43 @@
     )
     df.index.name = "DateTime"
     df.to_csv("weather.tsv", sep="\\t")
+
+Solcast Column Mapping
+~~~~~~~~~~~~~~~~~~~~~~
+When converting a Solcast DataFrame to SolarFarmer TSV format, the following
+column mapping and unit conversions are applied automatically by
+:func:`from_solcast`.
+
+Solcast ``period_end`` timestamps are shifted to period-beginning by
+subtracting the inferred time resolution (e.g. −30 min for 30-min data).
+``precipitable_water`` is in kg/m² (equivalent to mm) and is divided by 10
+to obtain cm as required by SolarFarmer.  ``surface_pressure`` is already in
+hPa which equals mbar, so no pressure conversion is needed.
+
+Only columns that are present in the DataFrame are mapped; ``period_end``,
+``air_temp``, and ``ghi`` are the most commonly available columns but the
+others are all optional.  The ``gti`` (plane-of-array irradiance) column is
+not mapped; SolarFarmer derives POA irradiance internally from GHI/DHI.
+
+=======================  ===========  =====================================
+Solcast column           SF column    Unit conversion
+=======================  ===========  =====================================
+``ghi``                  ``GHI``      W/m² → W/m² (none)
+``dhi``                  ``DHI``      W/m² → W/m² (none)
+``air_temp``             ``TAmb``     °C → °C (none)
+``wind_speed_10m``       ``WS``       m/s → m/s (none)
+``surface_pressure``     ``Pressure`` hPa → mbar (hPa = mbar, none)
+``precipitable_water``   ``Water``    kg/m² → cm (÷ 10)
+``relative_humidity``    ``RH``       % → % (none)
+``albedo``               ``Albedo``   fraction → fraction (none)
+``hsu_loss_fraction``    ``Soiling``  fraction → fraction (none)
+``kimber_loss_fraction`` ``Soiling``  fraction → fraction (none)
+``soiling``              ``Soiling``  fraction → fraction (none)
+=======================  ===========  =====================================
+
+.. note::
+   Only one soiling column should be present in the DataFrame. If multiple
+   are provided, the last one wins after column renaming.
 """
 
 from __future__ import annotations
@@ -78,6 +115,8 @@
     "check_sequential_year_timestamps",
     "from_dataframe",
     "from_pvlib",
+    "from_solcast",
+    "shift_period_end_to_beginning",
 ]
 
 
@@ -130,6 +169,20 @@ def check_sequential_year_timestamps(file_path: str | pathlib.Path) -> None:
     "pressure": "Pressure",
 }
 
+SOLCAST_COLUMN_MAP: dict[str, str] = {
+    "ghi": "GHI",
+    "dhi": "DHI",
+    "air_temp": "TAmb",
+    "wind_speed_10m": "WS",
+    "surface_pressure": "Pressure",
+    "precipitable_water": "Water",
+    "relative_humidity": "RH",
+    "albedo": "Albedo",
+    "hsu_loss_fraction": "Soiling",
+    "kimber_loss_fraction": "Soiling",
+    "soiling": "Soiling",
+}
+
 
 def from_dataframe(
     df: pd.DataFrame,
@@ -241,6 +294,131 @@ def from_pvlib(
     )
 
 
+def from_solcast(
+    df: pd.DataFrame,
+    output_path: str | pathlib.Path,
+) -> pathlib.Path:
+    """Convert a Solcast DataFrame to a SolarFarmer TSV weather file.
+
+    Wrapper around :func:`from_dataframe` with the standard Solcast column
+    mapping.  Two automatic conversions are applied before writing:
+
+    * **Timestamp shift**: Solcast timestamps represent ``period_end``;
+      SolarFarmer expects ``period_beginning``.  The time resolution is
+      inferred from the minimum consecutive time difference and subtracted
+      from every timestamp.
+    * **Precipitable water**: Solcast ``precipitable_water`` is in kg/m²
+      (equivalent to mm); SolarFarmer expects cm, so the column is divided
+      by 10.
+
+    ``surface_pressure`` is already in hPa which equals mbar, so no pressure
+    conversion is needed.
+
+    Only columns that are present in the DataFrame are mapped; the minimum
+    required columns are ``period_end`` (as the index), ``air_temp``, and
+    ``ghi``.  All other columns (``dhi``, ``wind_speed_10m``,
+    ``surface_pressure``, ``precipitable_water``, ``relative_humidity``,
+    ``albedo``, ``hsu_loss_fraction``, ``kimber_loss_fraction``, ``soiling``)
+    are optional and mapped when present.
+
+    .. note:: Requires ``pandas``. Install with ``pip install 'dnv-solarfarmer[all]'``.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        Solcast-style DataFrame with a DatetimeIndex (``period_end``) and
+        any subset of columns: ``ghi``, ``dhi``, ``air_temp``,
+        ``wind_speed_10m``, ``surface_pressure``, ``precipitable_water``,
+        ``relative_humidity``, ``albedo``, ``hsu_loss_fraction``,
+        ``kimber_loss_fraction``, ``soiling``.  Unmapped columns are removed.
+        Only one soiling column should be present; if multiple are provided,
+        the last one wins after column renaming.
+    output_path : str or Path
+        Destination file path.
+
+    Returns
+    -------
+    pathlib.Path
+
+    Raises
+    ------
+    ValueError
+        If the DataFrame has no DatetimeIndex.
+    ImportError
+        If pandas is not installed.
+    """
+    try:
+        import pandas as pd
+    except ImportError:
+        raise ImportError(PANDAS_INSTALL_MSG) from None
+
+    if not isinstance(df.index, pd.DatetimeIndex):
+        raise ValueError(
+            "DataFrame must have a DatetimeIndex. "
+            "Use df.set_index(pd.to_datetime(df['period_end'])) or similar."
+        )
+
+    out = df.copy()
+
+    # Solcast timestamps are period_end; SolarFarmer expects period_beginning.
+    out = shift_period_end_to_beginning(out)
+
+    # Drop columns that have no SolarFarmer equivalent (e.g. gti, any other custom fields).
+    out = out[[c for c in out.columns if c in SOLCAST_COLUMN_MAP]]
+
+    # precipitable_water: Solcast provides kg/m² (= mm); SolarFarmer expects cm.
+    if "precipitable_water" in out.columns:
+        out["precipitable_water"] = out["precipitable_water"] / 10
+
+    return from_dataframe(
+        out,
+        output_path,
+        column_map=SOLCAST_COLUMN_MAP,
+        pressure_pa_to_mbar=False,  # Solcast surface_pressure is hPa = mbar
+    )
+
+
+def shift_period_end_to_beginning(df: pd.DataFrame) -> pd.DataFrame:
+    """Shift DatetimeIndex from period_end to period_beginning.
+
+    Infers the time resolution from the minimum consecutive time difference
+    and subtracts it from all timestamps. Useful for converters where the
+    source data provides period_end timestamps but the target format expects
+    period_beginning.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        DataFrame with a DatetimeIndex representing period_end timestamps.
+
+    Returns
+    -------
+    pandas.DataFrame
+        DataFrame with DatetimeIndex shifted to period_beginning.
+
+    Raises
+    ------
+    ValueError
+        If the DataFrame has no DatetimeIndex.
+    """
+    try:
+        import pandas as pd
+    except ImportError:
+        raise ImportError(PANDAS_INSTALL_MSG) from None
+
+    if not isinstance(df.index, pd.DatetimeIndex):
+        raise ValueError(
+            "DataFrame must have a DatetimeIndex. "
+            "Use df.set_index(pd.to_datetime(df['period_end'])) or similar."
+        )
+
+    out = df.copy()
+    time_deltas = out.index.to_series().diff().dropna()
+    inferred_timedelta = time_deltas.median()
+    out.index = out.index - inferred_timedelta
+    return out
+
+
 TSV_COLUMNS: dict = {
     "required": [
         {

diff --git a/tests/test_weather.py b/tests/test_weather.py
@@ -7,9 +7,12 @@
 
 from solarfarmer.weather import (
     PVLIB_COLUMN_MAP,
+    SOLCAST_COLUMN_MAP,
     check_sequential_year_timestamps,
     from_dataframe,
     from_pvlib,
+    from_solcast,
+    shift_period_end_to_beginning,
 )
 
 
@@ -186,3 +189,124 @@ def test_output_passes_validation(self, tmp_path, pvlib_df):
         """TSV written by from_pvlib should pass check_sequential_year_timestamps."""
         out = from_pvlib(pvlib_df, tmp_path / "out.tsv")
         check_sequential_year_timestamps(out)  # should not raise
+
+
+class TestFromSolcast:
+    """Tests for from_solcast() convenience wrapper."""
+
+    @pytest.fixture
+    def solcast_df(self):
+        pd = pytest.importorskip("pandas")
+        idx = pd.date_range("1990-01-01 00:30", periods=3, freq="30min", tz="UTC")
+        return pd.DataFrame(
+            {
+                "ghi": [0, 500, 800],
+                "dhi": [0, 200, 300],
+                "air_temp": [5.0, 15.0, 25.0],
+                "wind_speed_10m": [2.0, 3.0, 4.0],
+                "surface_pressure": [1013.25, 1013.25, 1013.25],
+                "precipitable_water": [1.0, 2.0, 3.0],
+            },
+            index=idx,
+        )
+
+    def test_columns_renamed(self, tmp_path, solcast_df):
+        out = from_solcast(solcast_df, tmp_path / "out.tsv")
+        header = out.read_text().splitlines()[0]
+        # Only columns present in the fixture are mapped
+        for solcast_col, sf_col in SOLCAST_COLUMN_MAP.items():
+            if solcast_col in solcast_df.columns:
+                assert sf_col in header
+
+    def test_pressure_not_converted(self, tmp_path, solcast_df):
+        """surface_pressure is hPa = mbar; no conversion should be applied."""
+        out = from_solcast(solcast_df, tmp_path / "out.tsv")
+        lines = out.read_text().splitlines()
+        header = lines[0].split("\t")
+        pressure_idx = header.index("Pressure")
+        first_data = lines[1].split("\t")
+        assert float(first_data[pressure_idx]) == pytest.approx(1013.25)
+
+    def test_precipitable_water_converted(self, tmp_path, solcast_df):
+        """precipitable_water is kg/m² (= mm), must be divided by 10 to get cm."""
+        out = from_solcast(solcast_df, tmp_path / "out.tsv")
+        lines = out.read_text().splitlines()
+        header = lines[0].split("\t")
+        water_idx = header.index("Water")
+        first_data = lines[1].split("\t")
+        assert float(first_data[water_idx]) == pytest.approx(0.1)  # 1.0 / 10
+
+    def test_timestamp_shifted_to_period_beginning(self, tmp_path, solcast_df):
+        """Solcast period_end timestamps must be shifted back by the time resolution."""
+        out = from_solcast(solcast_df, tmp_path / "out.tsv")
+        first_data = out.read_text().splitlines()[1]
+        # Original index starts at 00:30; shifted by -30 min → 00:00
+        assert "T00:00" in first_data
+
+    def test_unknown_columns_dropped(self, tmp_path):
+        """Columns not in SOLCAST_COLUMN_MAP (e.g. gti) are dropped."""
+        pd = pytest.importorskip("pandas")
+        idx = pd.date_range("1990-01-01 01:00", periods=2, freq="h", tz="UTC")
+        df = pd.DataFrame(
+            {"ghi": [0, 500], "air_temp": [5.0, 15.0], "gti": [100.0, 200.0]},
+            index=idx,
+        )
+        out = from_solcast(df, tmp_path / "out.tsv")
+        header = out.read_text().splitlines()[0]
+        assert "gti" not in header
+
+    def test_no_datetimeindex_raises(self, tmp_path):
+        pd = pytest.importorskip("pandas")
+        df = pd.DataFrame({"ghi": [0, 100], "air_temp": [5.0, 15.0]})
+        with pytest.raises(ValueError, match="DatetimeIndex"):
+            from_solcast(df, tmp_path / "out.tsv")
+
+    def test_output_passes_validation(self, tmp_path, solcast_df):
+        """TSV written by from_solcast should pass check_sequential_year_timestamps."""
+        out = from_solcast(solcast_df, tmp_path / "out.tsv")
+        check_sequential_year_timestamps(out)  # should not raise
+
+    @pytest.mark.parametrize(
+        "soiling_col", ["hsu_loss_fraction", "kimber_loss_fraction", "soiling"]
+    )
+    def test_soiling_columns_mapped(self, tmp_path, soiling_col):
+        """hsu_loss_fraction, kimber_loss_fraction, and soiling all map to Soiling."""
+        pd = pytest.importorskip("pandas")
+        idx = pd.date_range("1990-01-01 00:30", periods=3, freq="30min", tz="UTC")
+        df = pd.DataFrame(
+            {"ghi": [0, 500, 800], "air_temp": [5.0, 15.0, 25.0], soiling_col: [0.01, 0.02, 0.03]},
+            index=idx,
+        )
+        out = from_solcast(df, tmp_path / "out.tsv")
+        header = out.read_text().splitlines()[0].split("\t")
+        assert "Soiling" in header
+        soiling_idx = header.index("Soiling")
+        first_data = out.read_text().splitlines()[1].split("\t")
+        assert float(first_data[soiling_idx]) == pytest.approx(0.01)
+
+
+class TestShiftPeriodEndToBeginning:
+    """Tests for shift_period_end_to_beginning()."""
+
+    def test_shifts_timestamps_by_time_resolution(self):
+        """Timestamps should be shifted back by the inferred time resolution."""
+        pd = pytest.importorskip("pandas")
+        # Create 30-minute resolution data starting at 00:30
+        idx = pd.date_range("1990-01-01 00:30", periods=3, freq="30min", tz="UTC")
+        df = pd.DataFrame({"ghi": [0, 100, 200]}, index=idx)
+
+        result = shift_period_end_to_beginning(df)
+
+        # Result should be shifted back by 30 minutes
+        expected_idx = pd.date_range("1990-01-01 00:00", periods=3, freq="30min", tz="UTC")
+        pd.testing.assert_index_equal(result.index, expected_idx)
+        # Data values should be unchanged (indices differ, so compare values only)
+        assert list(result["ghi"].values) == list(df["ghi"].values)
+
+    def test_no_datetimeindex_raises(self):
+        """Should raise ValueError when DataFrame has no DatetimeIndex."""
+        pd = pytest.importorskip("pandas")
+        df = pd.DataFrame({"ghi": [0, 100, 200]})
+
+        with pytest.raises(ValueError, match="DatetimeIndex"):
+            shift_period_end_to_beginning(df)