diff --git a/imap_processing/glows/l1a/glows_l1a.py b/imap_processing/glows/l1a/glows_l1a.py index 4314ce4a85..051b6f6667 100644 --- a/imap_processing/glows/l1a/glows_l1a.py +++ b/imap_processing/glows/l1a/glows_l1a.py @@ -338,6 +338,25 @@ def generate_histogram_dataset( ) hist_l1a_list = valid_hists + # Deduplicate by (imap_start_time, imap_time_offset), keeping the first occurrence. + seen_times: dict = {} + for hist in hist_l1a_list: + key = ( + hist.imap_start_time.seconds, + hist.imap_start_time.subseconds, + hist.imap_time_offset.seconds, + hist.imap_time_offset.subseconds, + ) + if key not in seen_times: + seen_times[key] = hist + dedup_hists = list(seen_times.values()) + if len(dedup_hists) < len(hist_l1a_list): + logger.warning( + f"GLOWS: Filtered out {len(hist_l1a_list) - len(dedup_hists)} " + f"duplicate histogram(s) by imap_start_time and imap_time_offset." + ) + hist_l1a_list = dedup_hists + # Store timestamps for each HistogramL1A object. time_data: np.ndarray = np.zeros(len(hist_l1a_list), dtype=np.int64) # Data in lists, for each of the 25 time varying datapoints in HistogramL1A diff --git a/imap_processing/tests/glows/test_glows_l1a_cdf.py b/imap_processing/tests/glows/test_glows_l1a_cdf.py index d559ae50a7..5e648436ac 100644 --- a/imap_processing/tests/glows/test_glows_l1a_cdf.py +++ b/imap_processing/tests/glows/test_glows_l1a_cdf.py @@ -83,6 +83,18 @@ def test_generate_histogram_dataset_filters_zero_imap_start_time(l1a_test_data): assert len(dataset["epoch"].values) == 2 +@pytest.mark.external_test_data +def test_generate_histogram_dataset_deduplicates(in_flight_packet_path): + hist_l0, _ = decom_packets(in_flight_packet_path) + hist_l1a = [HistogramL1A(h) for h in hist_l0] + glows_attrs = create_glows_attr_obj() + + dataset = generate_histogram_dataset(hist_l1a, glows_attrs) + + epochs = dataset["epoch"].values.tolist() + assert len(epochs) == len(set(epochs)) + + def test_generate_de_dataset(l1a_test_data): _, de_l1a = l1a_test_data glows_attrs = create_glows_attr_obj()