Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion solarfarmer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,13 @@
TransformerSpecification,
ValidationMessage,
)
from .weather import TSV_COLUMNS, check_sequential_year_timestamps, from_dataframe, from_pvlib
from .weather import (
TSV_COLUMNS,
check_sequential_year_timestamps,
from_dataframe,
from_pvlib,
from_solcast,
)

__all__ = [
"__version__",
Expand Down Expand Up @@ -122,5 +128,6 @@
"ValidationMessage",
"from_dataframe",
"from_pvlib",
"from_solcast",
"check_sequential_year_timestamps",
]
178 changes: 178 additions & 0 deletions solarfarmer/weather.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,43 @@
)
df.index.name = "DateTime"
df.to_csv("weather.tsv", sep="\\t")

Solcast Column Mapping
~~~~~~~~~~~~~~~~~~~~~~
When converting a Solcast DataFrame to SolarFarmer TSV format, the following
column mapping and unit conversions are applied automatically by
:func:`from_solcast`.

Solcast ``period_end`` timestamps are shifted to period-beginning by
subtracting the inferred time resolution (e.g. −30 min for 30-min data).
``precipitable_water`` is in kg/m² (equivalent to mm) and is divided by 10
to obtain cm as required by SolarFarmer. ``surface_pressure`` is already in
hPa which equals mbar, so no pressure conversion is needed.

Only columns that are present in the DataFrame are mapped; ``period_end``,
``air_temp``, and ``ghi`` are the most commonly available columns but the
others are all optional. The ``gti`` (plane-of-array irradiance) column is
not mapped; SolarFarmer derives POA irradiance internally from GHI/DHI.

======================= =========== =====================================
Solcast column SF column Unit conversion
======================= =========== =====================================
``ghi`` ``GHI`` W/m² → W/m² (none)
``dhi`` ``DHI`` W/m² → W/m² (none)
``air_temp`` ``TAmb`` °C → °C (none)
``wind_speed_10m`` ``WS`` m/s → m/s (none)
``surface_pressure`` ``Pressure`` hPa → mbar (hPa = mbar, none)
``precipitable_water`` ``Water`` kg/m² → cm (÷ 10)
``relative_humidity`` ``RH`` % → % (none)
``albedo`` ``Albedo`` fraction → fraction (none)
``hsu_loss_fraction`` ``Soiling`` fraction → fraction (none)
``kimber_loss_fraction`` ``Soiling`` fraction → fraction (none)
``soiling`` ``Soiling`` fraction → fraction (none)
======================= =========== =====================================

.. note::
Only one soiling column should be present in the DataFrame. If multiple
are provided, the last one wins after column renaming.
"""

from __future__ import annotations
Expand All @@ -78,6 +115,8 @@
"check_sequential_year_timestamps",
"from_dataframe",
"from_pvlib",
"from_solcast",
"shift_period_end_to_beginning",
]


Expand Down Expand Up @@ -130,6 +169,20 @@ def check_sequential_year_timestamps(file_path: str | pathlib.Path) -> None:
"pressure": "Pressure",
}

SOLCAST_COLUMN_MAP: dict[str, str] = {
"ghi": "GHI",
"dhi": "DHI",
"air_temp": "TAmb",
"wind_speed_10m": "WS",
"surface_pressure": "Pressure",
"precipitable_water": "Water",
"relative_humidity": "RH",
"albedo": "Albedo",
"hsu_loss_fraction": "Soiling",
"kimber_loss_fraction": "Soiling",
"soiling": "Soiling",
}


def from_dataframe(
df: pd.DataFrame,
Expand Down Expand Up @@ -241,6 +294,131 @@ def from_pvlib(
)


def from_solcast(
df: pd.DataFrame,
output_path: str | pathlib.Path,
) -> pathlib.Path:
"""Convert a Solcast DataFrame to a SolarFarmer TSV weather file.

Wrapper around :func:`from_dataframe` with the standard Solcast column
mapping. Two automatic conversions are applied before writing:

* **Timestamp shift**: Solcast timestamps represent ``period_end``;
SolarFarmer expects ``period_beginning``. The time resolution is
inferred from the minimum consecutive time difference and subtracted
from every timestamp.
* **Precipitable water**: Solcast ``precipitable_water`` is in kg/m²
(equivalent to mm); SolarFarmer expects cm, so the column is divided
by 10.

``surface_pressure`` is already in hPa which equals mbar, so no pressure
conversion is needed.

Only columns that are present in the DataFrame are mapped; the minimum
required columns are ``period_end`` (as the index), ``air_temp``, and
``ghi``. All other columns (``dhi``, ``wind_speed_10m``,
``surface_pressure``, ``precipitable_water``, ``relative_humidity``,
``albedo``, ``hsu_loss_fraction``, ``kimber_loss_fraction``, ``soiling``)
are optional and mapped when present.

.. note:: Requires ``pandas``. Install with ``pip install 'dnv-solarfarmer[all]'``.

Parameters
----------
df : pandas.DataFrame
Solcast-style DataFrame with a DatetimeIndex (``period_end``) and
any subset of columns: ``ghi``, ``dhi``, ``air_temp``,
``wind_speed_10m``, ``surface_pressure``, ``precipitable_water``,
``relative_humidity``, ``albedo``, ``hsu_loss_fraction``,
``kimber_loss_fraction``, ``soiling``. Unmapped columns are removed.
Only one soiling column should be present; if multiple are provided,
the last one wins after column renaming.
output_path : str or Path
Destination file path.

Returns
-------
pathlib.Path

Raises
------
ValueError
If the DataFrame has no DatetimeIndex.
ImportError
If pandas is not installed.
"""
try:
import pandas as pd
except ImportError:
raise ImportError(PANDAS_INSTALL_MSG) from None

if not isinstance(df.index, pd.DatetimeIndex):
raise ValueError(
"DataFrame must have a DatetimeIndex. "
"Use df.set_index(pd.to_datetime(df['period_end'])) or similar."
)

out = df.copy()

# Solcast timestamps are period_end; SolarFarmer expects period_beginning.
Comment thread
javlor marked this conversation as resolved.
out = shift_period_end_to_beginning(out)

# Drop columns that have no SolarFarmer equivalent (e.g. gti, any other custom fields).
out = out[[c for c in out.columns if c in SOLCAST_COLUMN_MAP]]
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so if I add something to my data like "soiling" then I need to do it after calling this function? the documentation says "unmapped columns are unchanged"... is that contradictory or do I misunderstand?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well spotted. A mistake in the docs string. I had added the columns removal afterwards to avoid passing the weather_type or other unexpected variables. I will add soiling to column map, so that it is preserved if the soiling is added before hand.


# precipitable_water: Solcast provides kg/m² (= mm); SolarFarmer expects cm.
if "precipitable_water" in out.columns:
out["precipitable_water"] = out["precipitable_water"] / 10

return from_dataframe(
out,
output_path,
column_map=SOLCAST_COLUMN_MAP,
pressure_pa_to_mbar=False, # Solcast surface_pressure is hPa = mbar
)


def shift_period_end_to_beginning(df: pd.DataFrame) -> pd.DataFrame:
"""Shift DatetimeIndex from period_end to period_beginning.

Infers the time resolution from the minimum consecutive time difference
and subtracts it from all timestamps. Useful for converters where the
source data provides period_end timestamps but the target format expects
period_beginning.

Parameters
----------
df : pandas.DataFrame
DataFrame with a DatetimeIndex representing period_end timestamps.

Returns
-------
pandas.DataFrame
DataFrame with DatetimeIndex shifted to period_beginning.

Raises
------
ValueError
If the DataFrame has no DatetimeIndex.
"""
try:
import pandas as pd
except ImportError:
raise ImportError(PANDAS_INSTALL_MSG) from None

if not isinstance(df.index, pd.DatetimeIndex):
raise ValueError(
"DataFrame must have a DatetimeIndex. "
"Use df.set_index(pd.to_datetime(df['period_end'])) or similar."
)

out = df.copy()
time_deltas = out.index.to_series().diff().dropna()
inferred_timedelta = time_deltas.median()
out.index = out.index - inferred_timedelta
return out


TSV_COLUMNS: dict = {
"required": [
{
Expand Down
124 changes: 124 additions & 0 deletions tests/test_weather.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@

from solarfarmer.weather import (
PVLIB_COLUMN_MAP,
SOLCAST_COLUMN_MAP,
check_sequential_year_timestamps,
from_dataframe,
from_pvlib,
from_solcast,
shift_period_end_to_beginning,
)


Expand Down Expand Up @@ -186,3 +189,124 @@ def test_output_passes_validation(self, tmp_path, pvlib_df):
"""TSV written by from_pvlib should pass check_sequential_year_timestamps."""
out = from_pvlib(pvlib_df, tmp_path / "out.tsv")
check_sequential_year_timestamps(out) # should not raise


class TestFromSolcast:
"""Tests for from_solcast() convenience wrapper."""

@pytest.fixture
def solcast_df(self):
pd = pytest.importorskip("pandas")
idx = pd.date_range("1990-01-01 00:30", periods=3, freq="30min", tz="UTC")
return pd.DataFrame(
{
"ghi": [0, 500, 800],
"dhi": [0, 200, 300],
"air_temp": [5.0, 15.0, 25.0],
"wind_speed_10m": [2.0, 3.0, 4.0],
"surface_pressure": [1013.25, 1013.25, 1013.25],
"precipitable_water": [1.0, 2.0, 3.0],
},
index=idx,
)

def test_columns_renamed(self, tmp_path, solcast_df):
out = from_solcast(solcast_df, tmp_path / "out.tsv")
header = out.read_text().splitlines()[0]
# Only columns present in the fixture are mapped
for solcast_col, sf_col in SOLCAST_COLUMN_MAP.items():
if solcast_col in solcast_df.columns:
assert sf_col in header

def test_pressure_not_converted(self, tmp_path, solcast_df):
"""surface_pressure is hPa = mbar; no conversion should be applied."""
out = from_solcast(solcast_df, tmp_path / "out.tsv")
lines = out.read_text().splitlines()
header = lines[0].split("\t")
pressure_idx = header.index("Pressure")
first_data = lines[1].split("\t")
assert float(first_data[pressure_idx]) == pytest.approx(1013.25)

def test_precipitable_water_converted(self, tmp_path, solcast_df):
"""precipitable_water is kg/m² (= mm), must be divided by 10 to get cm."""
out = from_solcast(solcast_df, tmp_path / "out.tsv")
lines = out.read_text().splitlines()
header = lines[0].split("\t")
water_idx = header.index("Water")
first_data = lines[1].split("\t")
assert float(first_data[water_idx]) == pytest.approx(0.1) # 1.0 / 10

def test_timestamp_shifted_to_period_beginning(self, tmp_path, solcast_df):
"""Solcast period_end timestamps must be shifted back by the time resolution."""
out = from_solcast(solcast_df, tmp_path / "out.tsv")
first_data = out.read_text().splitlines()[1]
# Original index starts at 00:30; shifted by -30 min → 00:00
assert "T00:00" in first_data

def test_unknown_columns_dropped(self, tmp_path):
"""Columns not in SOLCAST_COLUMN_MAP (e.g. gti) are dropped."""
pd = pytest.importorskip("pandas")
idx = pd.date_range("1990-01-01 01:00", periods=2, freq="h", tz="UTC")
df = pd.DataFrame(
{"ghi": [0, 500], "air_temp": [5.0, 15.0], "gti": [100.0, 200.0]},
index=idx,
)
out = from_solcast(df, tmp_path / "out.tsv")
header = out.read_text().splitlines()[0]
assert "gti" not in header

def test_no_datetimeindex_raises(self, tmp_path):
pd = pytest.importorskip("pandas")
df = pd.DataFrame({"ghi": [0, 100], "air_temp": [5.0, 15.0]})
with pytest.raises(ValueError, match="DatetimeIndex"):
from_solcast(df, tmp_path / "out.tsv")

def test_output_passes_validation(self, tmp_path, solcast_df):
"""TSV written by from_solcast should pass check_sequential_year_timestamps."""
out = from_solcast(solcast_df, tmp_path / "out.tsv")
check_sequential_year_timestamps(out) # should not raise

@pytest.mark.parametrize(
"soiling_col", ["hsu_loss_fraction", "kimber_loss_fraction", "soiling"]
)
def test_soiling_columns_mapped(self, tmp_path, soiling_col):
"""hsu_loss_fraction, kimber_loss_fraction, and soiling all map to Soiling."""
pd = pytest.importorskip("pandas")
idx = pd.date_range("1990-01-01 00:30", periods=3, freq="30min", tz="UTC")
df = pd.DataFrame(
{"ghi": [0, 500, 800], "air_temp": [5.0, 15.0, 25.0], soiling_col: [0.01, 0.02, 0.03]},
index=idx,
)
out = from_solcast(df, tmp_path / "out.tsv")
header = out.read_text().splitlines()[0].split("\t")
assert "Soiling" in header
soiling_idx = header.index("Soiling")
first_data = out.read_text().splitlines()[1].split("\t")
assert float(first_data[soiling_idx]) == pytest.approx(0.01)


class TestShiftPeriodEndToBeginning:
"""Tests for shift_period_end_to_beginning()."""

def test_shifts_timestamps_by_time_resolution(self):
"""Timestamps should be shifted back by the inferred time resolution."""
pd = pytest.importorskip("pandas")
# Create 30-minute resolution data starting at 00:30
idx = pd.date_range("1990-01-01 00:30", periods=3, freq="30min", tz="UTC")
df = pd.DataFrame({"ghi": [0, 100, 200]}, index=idx)

result = shift_period_end_to_beginning(df)

# Result should be shifted back by 30 minutes
expected_idx = pd.date_range("1990-01-01 00:00", periods=3, freq="30min", tz="UTC")
pd.testing.assert_index_equal(result.index, expected_idx)
# Data values should be unchanged (indices differ, so compare values only)
assert list(result["ghi"].values) == list(df["ghi"].values)

def test_no_datetimeindex_raises(self):
"""Should raise ValueError when DataFrame has no DatetimeIndex."""
pd = pytest.importorskip("pandas")
df = pd.DataFrame({"ghi": [0, 100, 200]})

with pytest.raises(ValueError, match="DatetimeIndex"):
shift_period_end_to_beginning(df)
Loading