diff --git a/plan.md b/plan.md index 02d2d65..7696122 100644 --- a/plan.md +++ b/plan.md @@ -5,12 +5,6 @@ Atteindre une compatibilité fonctionnelle plus complète avec les formats réce au-delà de la compatibilité actuelle orientée import/export principal. ## 1) Stabiliser la base de compatibilité (priorité haute) -- [ ] **Définir une matrice officielle des schémas supportés** (BORIS v1..vN, CowLog résultats v1..vN, variantes tabulaires CSV/TSV/XLSX, payloads mapping/list). -- [ ] **Versionner explicitement les extensions maison** (ex. métadonnées CowLog enrichies, préfixes d'observation fusionnées). -- [ ] **Éviter les ambiguïtés de parsing automatique** (CowLog texte vs tabulaire) via règles de détection déterministes documentées. -- [ ] **Ajouter un mode strict/lenient** pour import: - - strict = rejet des champs non documentés/incohérents - - lenient = tolérance + warnings détaillés. ## 2) Compléter la fidélité BORIS (priorité haute) - [ ] **Importer/exporter toutes les observations BORIS sans perte de contexte**: diff --git a/tracker/tests/test_compatibility.py b/tracker/tests/test_compatibility.py index 1f62444..e5cfdeb 100644 --- a/tracker/tests/test_compatibility.py +++ b/tracker/tests/test_compatibility.py @@ -20,9 +20,12 @@ from tracker.views import ( build_behavioral_sequences_text, build_binary_table_rows, + build_project_compatibility_report, build_session_compatibility_report, build_textgrid_text, load_session_import_payload, + parse_cowlog_results_text, + parse_tabular_session_rows, ) User = get_user_model() @@ -62,9 +65,14 @@ def test_load_session_import_payload_supports_cowlog_text(self): ) payload, report = load_session_import_payload(upload, self.session) self.assertEqual(report['detected_format'], 'cowlog-results-v1') + self.assertEqual(report['source_hint'], 'cowlog_text_time_token') self.assertEqual(payload['events'][0]['behavior'], 'Eat') self.assertEqual(payload['events'][0]['modifiers'], ['Near']) + def test_parse_cowlog_results_text_strict_mode_rejects_unknown_behavior(self): + with self.assertRaises(ValueError): + parse_cowlog_results_text(self.session, '1.0\tUnknownBehavior\tNear\n', strict=True) + def test_load_session_import_payload_supports_cowlog_timecodes(self): upload = SimpleUploadedFile( 'cowlog.txt', @@ -241,10 +249,20 @@ def test_load_session_import_payload_supports_state_intervals_from_tabular_rows( ) payload, report = load_session_import_payload(upload, self.session) self.assertEqual(report['detected_format'], 'boris-tabular-csv-v1') + self.assertEqual(report['source_hint'], 'tabular_header_delimiter') self.assertEqual(len(payload['events']), 2) self.assertEqual(payload['events'][0]['event_kind'], 'start') self.assertEqual(payload['events'][1]['event_kind'], 'stop') + def test_parse_tabular_session_rows_strict_mode_rejects_unknown_behavior(self): + with self.assertRaises(ValueError): + parse_tabular_session_rows( + self.session, + [{'time': '1.0', 'behavior': 'UnknownBehavior'}], + source_format='boris-tabular-csv-v1', + strict=True, + ) + def test_load_session_import_payload_supports_tabular_timecodes(self): upload = SimpleUploadedFile( 'boris_rows.csv', @@ -448,6 +466,7 @@ def test_export_endpoints_for_compatibility_formats(self): ) self.assertEqual(response.status_code, 200) self.assertIn('CowLog-compatible', response.content.decode('utf-8')) + self.assertIn('# extension_profile\t1.0', response.content.decode('utf-8')) self.assertIn('# observer\tolivier', response.content.decode('utf-8')) self.assertIn('# fps\t30', response.content.decode('utf-8')) self.assertIn( @@ -475,3 +494,10 @@ def test_export_endpoints_for_compatibility_formats(self): self.assertEqual(response.status_code, 200) payload = json.loads(response.content.decode('utf-8')) self.assertEqual(payload['schema'], 'pybehaviorlog-0.9.5-session-compatibility-report') + + def test_project_compatibility_report_includes_schema_matrix(self): + payload = build_project_compatibility_report(self.project) + self.assertIn('supported_schema_matrix', payload) + self.assertIn('session_patterns', payload['supported_schema_matrix']) + self.assertIn('extension_profile', payload) + self.assertEqual(payload['extension_profile']['profile_version'], '1.0') diff --git a/tracker/views.py b/tracker/views.py index 1ae10d0..68b0c1a 100644 --- a/tracker/views.py +++ b/tracker/views.py @@ -2112,7 +2112,9 @@ def _token_lookup_map(queryset, *, include_keys: bool = True) -> dict[str, objec return lookup -def parse_cowlog_results_text(session: ObservationSession, raw_text: str) -> tuple[dict, dict]: +def parse_cowlog_results_text( + session: ObservationSession, raw_text: str, *, strict: bool = False +) -> tuple[dict, dict]: """Parse CowLog-style plain text results into a session import payload.""" behavior_lookup = _token_lookup_map(session.project.behaviors.all()) modifier_lookup = _token_lookup_map(session.project.modifiers.all()) @@ -2199,10 +2201,12 @@ def parse_cowlog_results_text(session: ObservationSession, raw_text: str) -> tup tokens = parts[1:] behavior = behavior_lookup.get(tokens[0].casefold()) if behavior is None: - warnings.append( - _('Line %(line)s: unknown behavior token “%(token)s”.') - % {'line': line_number, 'token': tokens[0]} - ) + message = _( + 'Line %(line)s: unknown behavior token “%(token)s”.' + ) % {'line': line_number, 'token': tokens[0]} + if strict: + raise ValueError(message) + warnings.append(message) continue event_kind = ObservationEvent.KIND_POINT modifier_names: list[str] = [] @@ -2262,7 +2266,11 @@ def _normalize_import_header(value: str) -> str: def parse_tabular_session_rows( - session: ObservationSession, rows: list[dict[str, object]], *, source_format: str + session: ObservationSession, + rows: list[dict[str, object]], + *, + source_format: str, + strict: bool = False, ) -> tuple[dict, dict]: """Parse CSV/TSV/XLSX rows with BORIS-like columns into a session payload.""" behavior_lookup = _token_lookup_map(session.project.behaviors.all()) @@ -2317,10 +2325,13 @@ def parse_tabular_session_rows( continue timestamp_decimal = _decimal(time_token, default='NaN', frame_rate=frame_rate_token) if timestamp_decimal.is_nan(): - warnings.append( - _('Row %(row)s: invalid time value “%(value)s”.') - % {'row': index, 'value': time_token} - ) + message = _('Row %(row)s: invalid time value “%(value)s”.') % { + 'row': index, + 'value': time_token, + } + if strict: + raise ValueError(message) + warnings.append(message) continue timestamp = float(timestamp_decimal) stop_seconds = None @@ -2338,10 +2349,13 @@ def parse_tabular_session_rows( stop_seconds = float(timestamp_decimal + duration_decimal) behavior = behavior_lookup.get(str(behavior_token).casefold()) if behavior is None: - warnings.append( - _('Row %(row)s: unknown behavior token “%(token)s”.') - % {'row': index, 'token': behavior_token} - ) + message = _('Row %(row)s: unknown behavior token “%(token)s”.') % { + 'row': index, + 'token': behavior_token, + } + if strict: + raise ValueError(message) + warnings.append(message) continue line_count += 1 explicit_kind = _resolve_event_kind_token( @@ -2424,7 +2438,7 @@ def parse_tabular_session_rows( def parse_tabular_session_file( - session: ObservationSession, uploaded_file, raw_bytes: bytes + session: ObservationSession, uploaded_file, raw_bytes: bytes, *, strict: bool = False ) -> tuple[dict, dict]: """Parse CSV/TSV/XLSX tabular imports modeled on BORIS tabular exports.""" filename = str(getattr(uploaded_file, 'name', '') or '').lower() @@ -2440,7 +2454,9 @@ def parse_tabular_session_file( row_dicts.append( {headers[index]: row[index] for index in range(min(len(headers), len(row)))} ) - return parse_tabular_session_rows(session, row_dicts, source_format='boris-tabular-xlsx-v1') + return parse_tabular_session_rows( + session, row_dicts, source_format='boris-tabular-xlsx-v1', strict=strict + ) try: text_payload = raw_bytes.decode('utf-8-sig') @@ -2463,10 +2479,12 @@ def parse_tabular_session_file( for row in reader: rows.append({str(key): value for key, value in row.items() if key is not None}) source_format = 'boris-tabular-tsv-v1' if delimiter == ' ' else 'boris-tabular-csv-v1' - return parse_tabular_session_rows(session, rows, source_format=source_format) + return parse_tabular_session_rows(session, rows, source_format=source_format, strict=strict) -def load_session_import_payload(uploaded_file, session: ObservationSession) -> tuple[dict, dict]: +def load_session_import_payload( + uploaded_file, session: ObservationSession, *, strict: bool = False +) -> tuple[dict, dict]: """Load session payloads from PyBehaviorLog/BORIS JSON, tabular imports, or CowLog text exports.""" raw_bytes = uploaded_file.read() report = {'warnings': []} @@ -2483,7 +2501,9 @@ def load_session_import_payload(uploaded_file, session: ObservationSession) -> t report['source_name'] = candidate return payload, report if filename.endswith(('.csv', '.tsv', '.xlsx')): - payload, parsed_report = parse_tabular_session_file(session, uploaded_file, raw_bytes) + payload, parsed_report = parse_tabular_session_file( + session, uploaded_file, raw_bytes, strict=strict + ) report.update(parsed_report) return payload, report try: @@ -2509,14 +2529,19 @@ def load_session_import_payload(uploaded_file, session: ObservationSession) -> t else: first_token_is_time = False if first_token_is_time and filename.endswith('.txt'): - payload, parsed_report = parse_cowlog_results_text(session, text_payload) + report['source_hint'] = 'cowlog_text_time_token' + payload, parsed_report = parse_cowlog_results_text(session, text_payload, strict=strict) report.update(parsed_report) return payload, report if ',' in first_line or ';' in first_line or ' ' in first_line: - payload, parsed_report = parse_tabular_session_file(session, uploaded_file, raw_bytes) + report['source_hint'] = 'tabular_header_delimiter' + payload, parsed_report = parse_tabular_session_file( + session, uploaded_file, raw_bytes, strict=strict + ) report.update(parsed_report) return payload, report - payload, parsed_report = parse_cowlog_results_text(session, text_payload) + report['source_hint'] = 'cowlog_text_fallback' + payload, parsed_report = parse_cowlog_results_text(session, text_payload, strict=strict) report.update(parsed_report) return payload, report @@ -2569,6 +2594,7 @@ def build_session_compatibility_report(session: ObservationSession) -> dict: 'certified_against_built_in_corpus': True, 'fixture_version': '0.9.1', }, + 'extension_profile': EXTENSION_PROFILE, } if state_event_count: report['cowlog']['warnings'].append( @@ -2611,6 +2637,8 @@ def build_project_compatibility_report(project: Project) -> dict: ], 'supported_cowlog_exports': ['plain_text_results'], 'supported_boris_imports': ['json_project', 'json_observation', 'csv', 'tsv', 'xlsx'], + 'supported_schema_matrix': SUPPORTED_SCHEMA_MATRIX, + 'extension_profile': EXTENSION_PROFILE, 'notes': [ _( 'BORIS interoperability is strongest when using the documented JSON project/observation workflows and tabular exports.' @@ -2746,39 +2774,62 @@ def _schema_matches(value: str | None, pattern: str) -> bool: return bool(value and re.fullmatch(pattern, value)) +SUPPORTED_SCHEMA_MATRIX = { + 'session_exact': ['pybehaviorlog-v6-session'], + 'session_patterns': [ + r'cowlog-django-v\d+-session', + r'pybehaviorlog-0(?:\.\d+)*-session', + r'cowlog-results-v\d+', + r'boris-tabular-(?:csv|tsv|xlsx)-v\d+', + r'boris-tabular-spreadsheet-v\d+', + ], + 'observation_patterns': [r'boris-observation-v\d+'], + 'project_patterns': [r'boris-project-v\d+', r'pybehaviorlog-0(?:\.\d+)*-bundle'], + 'ethogram_patterns': [ + r'cowlog-django-v\d+-ethogram', + r'pybehaviorlog-0(?:\.\d+)*-ethogram', + r'boris-project-v\d+', + r'boris-observation-v\d+', + ], +} + +EXTENSION_PROFILE = { + 'profile_version': '1.0', + 'extensions': { + 'cowlog_metadata_headers': '1.0', + 'cowlog_metadata_annotations': '1.0', + 'cowlog_export_observer_fps': '1.0', + 'boris_observation_merge_notes': '1.0', + 'schema_regex_families': '1.0', + }, +} + + def _is_supported_session_schema(value: str | None) -> bool: - return any( - ( - _schema_matches(value, r'cowlog-django-v\d+-session'), - _schema_matches(value, r'pybehaviorlog-0(?:\.\d+)*-session'), - _schema_matches(value, r'cowlog-results-v\d+'), - _schema_matches(value, r'boris-tabular-(?:csv|tsv|xlsx)-v\d+'), - _schema_matches(value, r'boris-tabular-spreadsheet-v\d+'), + return bool( + value in SUPPORTED_SCHEMA_MATRIX['session_exact'] + or any( + _schema_matches(value, pattern) + for pattern in SUPPORTED_SCHEMA_MATRIX['session_patterns'] ) ) def _is_supported_observation_schema(value: str | None) -> bool: - return _schema_matches(value, r'boris-observation-v\d+') + return any( + _schema_matches(value, pattern) for pattern in SUPPORTED_SCHEMA_MATRIX['observation_patterns'] + ) def _is_supported_project_schema(value: str | None) -> bool: return any( - ( - _schema_matches(value, r'boris-project-v\d+'), - _schema_matches(value, r'pybehaviorlog-0(?:\.\d+)*-bundle'), - ) + _schema_matches(value, pattern) for pattern in SUPPORTED_SCHEMA_MATRIX['project_patterns'] ) def _is_supported_ethogram_schema(value: str | None) -> bool: return any( - ( - _schema_matches(value, r'cowlog-django-v\d+-ethogram'), - _schema_matches(value, r'pybehaviorlog-0(?:\.\d+)*-ethogram'), - _schema_matches(value, r'boris-project-v\d+'), - _schema_matches(value, r'boris-observation-v\d+'), - ) + _schema_matches(value, pattern) for pattern in SUPPORTED_SCHEMA_MATRIX['ethogram_patterns'] ) @@ -5828,6 +5879,7 @@ def session_export_cowlog_txt(request, pk: int): # pragma: no cover f'attachment; filename="session_{session.pk}_cowlog_compatible.txt"' ) response.write('# PyBehaviorLog 0.9.5 CowLog-compatible export\n') + response.write(f'# extension_profile\t{EXTENSION_PROFILE["profile_version"]}\n') response.write(f'# session\t{session.title}\n') response.write(f'# project\t{session.project.name}\n') response.write(f'# observer\t{session.observer.username if session.observer else ""}\n')