diff --git a/plan.md b/plan.md new file mode 100644 index 0000000..02d2d65 --- /dev/null +++ b/plan.md @@ -0,0 +1,94 @@ +# Plan d'implémentation restant BORIS/CowLog + +## Objectif +Atteindre une compatibilité fonctionnelle plus complète avec les formats récents BORIS/CowLog, +au-delà de la compatibilité actuelle orientée import/export principal. + +## 1) Stabiliser la base de compatibilité (priorité haute) +- [ ] **Définir une matrice officielle des schémas supportés** (BORIS v1..vN, CowLog résultats v1..vN, variantes tabulaires CSV/TSV/XLSX, payloads mapping/list). +- [ ] **Versionner explicitement les extensions maison** (ex. métadonnées CowLog enrichies, préfixes d'observation fusionnées). +- [ ] **Éviter les ambiguïtés de parsing automatique** (CowLog texte vs tabulaire) via règles de détection déterministes documentées. +- [ ] **Ajouter un mode strict/lenient** pour import: + - strict = rejet des champs non documentés/incohérents + - lenient = tolérance + warnings détaillés. + +## 2) Compléter la fidélité BORIS (priorité haute) +- [ ] **Importer/exporter toutes les observations BORIS sans perte de contexte**: + - identifiants d'observation + - médias synchronisés par observation + - variables par observation + - commentaires/notes d'observation. +- [ ] **Préserver la structure multi-observation lors de l'export** (pas uniquement fusionnée), avec option de fusion configurable. +- [ ] **Supporter entièrement les colonnes BORIS tabulaires avancées**: + - start/stop/duration/frame/fps + - colonnes alias documentées BORIS + - annotation rows enrichies. +- [ ] **Ajouter un validateur d'intégrité BORIS** dédié (state pairs, ordre temporel, overlap states, comportements inconnus). + +## 3) Compléter la fidélité CowLog (priorité haute) +- [ ] **Normaliser le profil CowLog “texte résultats”**: + - en-têtes standard (session/projet/observer/video/fps) + - annotations métadonnées + - variantes de séparateurs/tabulations. +- [ ] **Améliorer la reconstruction des états CowLog**: + - stratégie configurable pour point/start/stop implicites + - rapport explicite des pertes de fidélité. +- [ ] **Garantir le round-trip CowLog↔PyBehaviorLog↔CowLog** avec mêmes métadonnées clés (dont fps/observer). + +## 4) Timecodes et frame-rate (priorité moyenne) +- [ ] **Centraliser un parseur temporel unique** (décimal, ISO8601, SMPTE, frame). +- [ ] **Ajouter la gestion explicite du drop-frame SMPTE** (si nécessaire selon corpus cible). +- [ ] **Rendre la résolution FPS explicite et traçable**: + - priorité: row > metadata > variable > défaut + - écrire la source FPS utilisée dans le rapport d'import. + +## 5) Rapports et diagnostics (priorité moyenne) +- [ ] **Étendre les rapports de compatibilité** pour lister: + - champs ignorés + - conversions appliquées + - pertes potentielles de fidélité + - niveau de confiance du parsing. +- [ ] **Ajouter un export “diagnostic JSON”** par import, archivable en CI. + +## 6) Tests et certification (priorité haute) +- [ ] **Constituer un corpus de fixtures BORIS/CowLog réels** (versions et variantes récentes). +- [ ] **Ajouter des tests de non-régression paramétrés**: + - mapping vs list + - multi-observation + - timecodes exotiques + - séparateurs régionaux. +- [ ] **Mettre en place des tests round-trip sémantiques** (pas seulement structurels). +- [ ] **Ajouter un job CI “compatibility certification”** avec seuil de réussite. + +## 7) UX / produit (priorité moyenne) +- [ ] **Ajouter un écran de prévisualisation avant import**: + - format détecté + - fps détecté + - nombre d'événements/annotations + - warnings bloquants/non bloquants. +- [ ] **Permettre à l'utilisateur de corriger manuellement**: + - fps + - mapping des colonnes + - stratégie state reconstruction. + +## 8) Documentation (priorité haute) +- [ ] **Mettre à jour la documentation de compatibilité** avec un tableau clair: + - “supporté totalement” + - “supporté partiellement” + - “non supporté”. +- [ ] **Documenter les limites connues** et les chemins recommandés (BORIS JSON vs CowLog texte). +- [ ] **Publier un guide de migration/import** pour laboratoires utilisant BORIS/CowLog. + +--- + +## Plan d'exécution recommandé (ordre) +1. Stabilisation parsing + mode strict/lenient. +2. Fidélité BORIS multi-observation complète. +3. Fidélité CowLog round-trip complète. +4. Diagnostics enrichis et CI de certification. +5. UX de pré-import et documentation finale. + +## Critères de fin +- Corpus de référence BORIS/CowLog passe à > 99% d'équivalence sémantique. +- Différences résiduelles documentées automatiquement dans les rapports. +- Pipeline CI bloque toute régression de compatibilité. diff --git a/tracker/tests/test_compatibility.py b/tracker/tests/test_compatibility.py index d5b5422..c0594a0 100644 --- a/tracker/tests/test_compatibility.py +++ b/tracker/tests/test_compatibility.py @@ -8,9 +8,11 @@ from tracker.models import ( Behavior, + IndependentVariableDefinition, Modifier, ObservationEvent, ObservationSession, + ObservationVariableValue, Project, SessionAnnotation, Subject, @@ -114,6 +116,28 @@ def test_load_session_import_payload_supports_cowlog_frame_rate_metadata(self): self.assertEqual(report['frame_rate'], '30') self.assertAlmostEqual(payload['events'][0]['time'], 10.5, places=3) + def test_load_session_import_payload_supports_cowlog_frame_rate_with_unit(self): + upload = SimpleUploadedFile( + 'cowlog.txt', + b'# fps\t29.97 fps\n00:00:10:15\tEat\tNear\n', + content_type='text/plain', + ) + payload, report = load_session_import_payload(upload, self.session) + self.assertEqual(report['detected_format'], 'cowlog-results-v1') + self.assertEqual(report['frame_rate'], '29.97 fps') + self.assertAlmostEqual(payload['events'][0]['time'], 10.5005, places=3) + + def test_load_session_import_payload_supports_cowlog_colon_metadata(self): + upload = SimpleUploadedFile( + 'cowlog.txt', + b'# fps:30\n00:00:10:15\tEat\tNear\n', + content_type='text/plain', + ) + payload, report = load_session_import_payload(upload, self.session) + self.assertEqual(report['detected_format'], 'cowlog-results-v1') + self.assertEqual(report['frame_rate'], '30') + self.assertAlmostEqual(payload['events'][0]['time'], 10.5, places=3) + def test_load_session_import_payload_supports_cowlog_state_aliases(self): upload = SimpleUploadedFile( 'cowlog.txt', @@ -159,6 +183,18 @@ def test_load_session_import_payload_parses_cowlog_metadata_annotations(self): self.assertEqual(payload['annotations'][0]['title'], 'Marker') self.assertEqual(payload['annotations'][0]['note'], 'Interesting moment') + def test_load_session_import_payload_parses_quoted_cowlog_metadata_annotations(self): + upload = SimpleUploadedFile( + 'cowlog.txt', + b'# annotation 3.0 Marker \"Interesting moment with spaces\"\n1.0\tEat\tNear\n', + content_type='text/plain', + ) + payload, report = load_session_import_payload(upload, self.session) + self.assertEqual(report['detected_format'], 'cowlog-results-v1') + self.assertEqual(report['annotation_count'], 1) + self.assertEqual(payload['annotations'][0]['title'], 'Marker') + self.assertEqual(payload['annotations'][0]['note'], 'Interesting moment with spaces') + def test_load_session_import_payload_parses_cowlog_headers(self): upload = SimpleUploadedFile( 'cowlog.txt', @@ -220,6 +256,17 @@ def test_load_session_import_payload_supports_tabular_iso8601_durations(self): self.assertEqual(payload['events'][0]['time'], 5.0) self.assertEqual(payload['events'][1]['time'], 8.5) + def test_load_session_import_payload_supports_semicolon_csv_with_comma_decimals(self): + upload = SimpleUploadedFile( + 'boris_rows.csv', + b'time;stop;behavior\n00:00:05,100;00:00:08,600;Stand\n', + content_type='text/csv', + ) + payload, report = load_session_import_payload(upload, self.session) + self.assertEqual(report['detected_format'], 'boris-tabular-csv-v1') + self.assertAlmostEqual(payload['events'][0]['time'], 5.1, places=3) + self.assertAlmostEqual(payload['events'][1]['time'], 8.6, places=3) + def test_load_session_import_payload_supports_tabular_frame_timecodes(self): upload = SimpleUploadedFile( 'boris_rows.csv', @@ -245,7 +292,7 @@ def test_load_session_import_payload_supports_tabular_smpte_semicolon_timecodes( def test_load_session_import_payload_supports_tabular_custom_frame_rate(self): upload = SimpleUploadedFile( 'boris_rows.csv', - b'time,stop,behavior,frame_rate\n00:00:05:10,00:00:08:20,Stand,50\n', + b'time,stop,behavior,frame_rate\n00:00:05:10,00:00:08:20,Stand,50 fps\n', content_type='text/csv', ) payload, report = load_session_import_payload(upload, self.session) @@ -364,11 +411,23 @@ def test_export_endpoints_for_compatibility_formats(self): color='#f59e0b', created_by=self.user, ) + fps_definition = IndependentVariableDefinition.objects.create( + project=self.project, + label='fps', + value_type=IndependentVariableDefinition.TYPE_NUMERIC, + ) + ObservationVariableValue.objects.create( + session=self.session, + definition=fps_definition, + value='30', + ) response = self.client.get( reverse('tracker:session_export_cowlog_txt', args=[self.session.pk]) ) self.assertEqual(response.status_code, 200) self.assertIn('CowLog-compatible', response.content.decode('utf-8')) + self.assertIn('# observer\tolivier', response.content.decode('utf-8')) + self.assertIn('# fps\t30', response.content.decode('utf-8')) self.assertIn( '# annotation\t1.5\tMark\tCowLog annotation line', response.content.decode('utf-8'), diff --git a/tracker/tests/test_helpers.py b/tracker/tests/test_helpers.py index 84f0665..0a4e849 100644 --- a/tracker/tests/test_helpers.py +++ b/tracker/tests/test_helpers.py @@ -178,12 +178,18 @@ def test_import_session_payload_accepts_newer_cowlog_schema(self): self.assertEqual(annotation_count, 0) def test_import_session_payload_applies_cowlog_metadata_to_notes(self): + fps_definition = IndependentVariableDefinition.objects.create( + project=self.project, + label='fps', + value_type=IndependentVariableDefinition.TYPE_NUMERIC, + ) payload = { 'schema': 'cowlog-results-v2', 'metadata': { 'session': 'Imported header session', 'project': 'Imported header project', 'primary_video': 'clip.mp4', + 'fps': '29.97 fps', }, 'events': [{'behavior': 'Eat', 'event_kind': 'point', 'time': 1.5}], 'annotations': [], @@ -196,6 +202,11 @@ def test_import_session_payload_applies_cowlog_metadata_to_notes(self): self.assertIn('project: Imported header project', self.session.notes) self.assertIn('primary_video: clip.mp4', self.session.notes) self.assertEqual(self.session.notes.count('Imported CowLog metadata:'), 1) + fps_value = ObservationVariableValue.objects.get( + session=self.session, + definition=fps_definition, + ) + self.assertEqual(fps_value.value, '29.97 fps') def test_import_session_payload_accepts_mapping_event_rows(self): payload = { diff --git a/tracker/views.py b/tracker/views.py index 82cceb3..feaeaac 100644 --- a/tracker/views.py +++ b/tracker/views.py @@ -630,9 +630,7 @@ def _decimal( minutes = Decimal(parts[1]) seconds = Decimal(parts[2].replace(',', '.')) frames = Decimal(parts[3]) - fps = _decimal(frame_rate, default='25') if frame_rate is not None else Decimal('25') - if fps <= 0: - fps = Decimal('25') + fps = _normalize_frame_rate_token(frame_rate) return ( (hours * Decimal('3600')) + (minutes * Decimal('60')) @@ -688,6 +686,19 @@ def _append_note_line(existing: str | None, line: str, *, max_length: int = 2000 return '\n'.join(lines)[:max_length] +def _normalize_frame_rate_token(value: str | float | Decimal | None) -> Decimal: + """Return a positive frame rate value parsed from tokens like '29.97 fps'.""" + if value is None: + return Decimal('25') + if isinstance(value, Decimal): + return value if value > 0 else Decimal('25') + match = re.search(r'[-+]?\d+(?:[.,]\d+)?', str(value)) + if not match: + return Decimal('25') + parsed = _decimal(match.group(0), default='25') + return parsed if parsed > 0 else Decimal('25') + + def _resolve_storage_path(video: VideoAsset | None) -> Path | None: """Resolve a local filesystem path when the file is available on local storage.""" if video is None or not getattr(video, 'file', None): @@ -2109,15 +2120,17 @@ def parse_cowlog_results_text(session: ObservationSession, raw_text: str) -> tup if line.startswith('#'): metadata_line = line[1:].strip() if metadata_line: - metadata_parts = [ - part.strip() - for part in ( - metadata_line.split(' ') - if ' ' in metadata_line - else metadata_line.split() - ) - if part.strip() - ] + if ' ' in metadata_line: + raw_metadata_parts = metadata_line.split(' ') + else: + try: + raw_metadata_parts = shlex.split(metadata_line) + except ValueError: + raw_metadata_parts = metadata_line.split() + metadata_parts = [part.strip() for part in raw_metadata_parts if part.strip()] + if len(metadata_parts) == 1 and ':' in metadata_parts[0]: + key, value = metadata_parts[0].split(':', 1) + metadata_parts = [key.strip(), value.strip()] if metadata_parts and metadata_parts[0].casefold() in {'note', 'annotation'}: annotation_time = ( _decimal(metadata_parts[1], default='NaN') @@ -2428,6 +2441,10 @@ def parse_tabular_session_file( or filename.endswith('.tsv') else ',' ) + if delimiter == ',' and filename.endswith('.csv'): + first_line = text_payload.splitlines()[0] if text_payload.splitlines() else '' + if ';' in first_line and first_line.count(';') >= first_line.count(','): + delimiter = ';' reader = csv.DictReader(io.StringIO(text_payload), delimiter=delimiter) if not reader.fieldnames: raise ValueError(_('The uploaded tabular file does not contain a header row.')) @@ -2484,7 +2501,7 @@ def load_session_import_payload(uploaded_file, session: ObservationSession) -> t payload, parsed_report = parse_cowlog_results_text(session, text_payload) report.update(parsed_report) return payload, report - if ',' in first_line or ' ' in first_line: + if ',' in first_line or ';' in first_line or ' ' in first_line: payload, parsed_report = parse_tabular_session_file(session, uploaded_file, raw_bytes) report.update(parsed_report) return payload, report @@ -3594,6 +3611,27 @@ def import_session_payload( value = str(metadata_items.get(key, '')).strip() if value: metadata_notes.append(f'{key}: {value}') + fps_metadata = None + for key in ('fps', 'frame_rate', 'framerate'): + value = str(metadata_items.get(key, '')).strip() + if value: + fps_metadata = value + break + if fps_metadata: + fps_definition = next( + ( + definition + for label, definition in variable_map.items() + if label.casefold().replace(' ', '_') in {'fps', 'frame_rate', 'framerate'} + ), + None, + ) + if fps_definition is not None: + ObservationVariableValue.objects.update_or_create( + session=session, + definition=fps_definition, + defaults={'value': fps_metadata}, + ) if metadata_notes: session.notes = _append_note_line( session.notes, @@ -5781,7 +5819,19 @@ def session_export_cowlog_txt(request, pk: int): # pragma: no cover response.write('# PyBehaviorLog 0.9.5 CowLog-compatible export\n') response.write(f'# session\t{session.title}\n') response.write(f'# project\t{session.project.name}\n') + response.write(f'# observer\t{session.observer.username if session.observer else ""}\n') response.write(f'# primary_video\t{session.primary_label}\n') + fps_value = ( + ObservationVariableValue.objects.filter( + session=session, + definition__label__iregex=r'^(fps|frame[_ ]?rate|framerate)$', + ) + .order_by('definition__sort_order', 'definition__label') + .values_list('value', flat=True) + .first() + ) + if fps_value: + response.write(f'# fps\t{fps_value}\n') report = build_session_compatibility_report(session) for warning in report['cowlog']['warnings']: response.write(f'# warning\t{warning}\n')