Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,6 @@ Atteindre une compatibilité fonctionnelle plus complète avec les formats réce
au-delà de la compatibilité actuelle orientée import/export principal.

## 1) Stabiliser la base de compatibilité (priorité haute)
- [ ] **Définir une matrice officielle des schémas supportés** (BORIS v1..vN, CowLog résultats v1..vN, variantes tabulaires CSV/TSV/XLSX, payloads mapping/list).
- [ ] **Versionner explicitement les extensions maison** (ex. métadonnées CowLog enrichies, préfixes d'observation fusionnées).
- [ ] **Éviter les ambiguïtés de parsing automatique** (CowLog texte vs tabulaire) via règles de détection déterministes documentées.
- [ ] **Ajouter un mode strict/lenient** pour import:
- strict = rejet des champs non documentés/incohérents
- lenient = tolérance + warnings détaillés.

## 2) Compléter la fidélité BORIS (priorité haute)
- [ ] **Importer/exporter toutes les observations BORIS sans perte de contexte**:
Expand Down
26 changes: 26 additions & 0 deletions tracker/tests/test_compatibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@
from tracker.views import (
build_behavioral_sequences_text,
build_binary_table_rows,
build_project_compatibility_report,
build_session_compatibility_report,
build_textgrid_text,
load_session_import_payload,
parse_cowlog_results_text,
parse_tabular_session_rows,
)

User = get_user_model()
Expand Down Expand Up @@ -62,9 +65,14 @@ def test_load_session_import_payload_supports_cowlog_text(self):
)
payload, report = load_session_import_payload(upload, self.session)
self.assertEqual(report['detected_format'], 'cowlog-results-v1')
self.assertEqual(report['source_hint'], 'cowlog_text_time_token')
self.assertEqual(payload['events'][0]['behavior'], 'Eat')
self.assertEqual(payload['events'][0]['modifiers'], ['Near'])

def test_parse_cowlog_results_text_strict_mode_rejects_unknown_behavior(self):
with self.assertRaises(ValueError):
parse_cowlog_results_text(self.session, '1.0\tUnknownBehavior\tNear\n', strict=True)

def test_load_session_import_payload_supports_cowlog_timecodes(self):
upload = SimpleUploadedFile(
'cowlog.txt',
Expand Down Expand Up @@ -241,10 +249,20 @@ def test_load_session_import_payload_supports_state_intervals_from_tabular_rows(
)
payload, report = load_session_import_payload(upload, self.session)
self.assertEqual(report['detected_format'], 'boris-tabular-csv-v1')
self.assertEqual(report['source_hint'], 'tabular_header_delimiter')
self.assertEqual(len(payload['events']), 2)
self.assertEqual(payload['events'][0]['event_kind'], 'start')
self.assertEqual(payload['events'][1]['event_kind'], 'stop')

def test_parse_tabular_session_rows_strict_mode_rejects_unknown_behavior(self):
with self.assertRaises(ValueError):
parse_tabular_session_rows(
self.session,
[{'time': '1.0', 'behavior': 'UnknownBehavior'}],
source_format='boris-tabular-csv-v1',
strict=True,
)

def test_load_session_import_payload_supports_tabular_timecodes(self):
upload = SimpleUploadedFile(
'boris_rows.csv',
Expand Down Expand Up @@ -448,6 +466,7 @@ def test_export_endpoints_for_compatibility_formats(self):
)
self.assertEqual(response.status_code, 200)
self.assertIn('CowLog-compatible', response.content.decode('utf-8'))
self.assertIn('# extension_profile\t1.0', response.content.decode('utf-8'))
self.assertIn('# observer\tolivier', response.content.decode('utf-8'))
self.assertIn('# fps\t30', response.content.decode('utf-8'))
self.assertIn(
Expand Down Expand Up @@ -475,3 +494,10 @@ def test_export_endpoints_for_compatibility_formats(self):
self.assertEqual(response.status_code, 200)
payload = json.loads(response.content.decode('utf-8'))
self.assertEqual(payload['schema'], 'pybehaviorlog-0.9.5-session-compatibility-report')

def test_project_compatibility_report_includes_schema_matrix(self):
payload = build_project_compatibility_report(self.project)
self.assertIn('supported_schema_matrix', payload)
self.assertIn('session_patterns', payload['supported_schema_matrix'])
self.assertIn('extension_profile', payload)
self.assertEqual(payload['extension_profile']['profile_version'], '1.0')
132 changes: 92 additions & 40 deletions tracker/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2112,7 +2112,9 @@
return lookup


def parse_cowlog_results_text(session: ObservationSession, raw_text: str) -> tuple[dict, dict]:
def parse_cowlog_results_text(

Check failure on line 2115 in tracker/views.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this function to reduce its Cognitive Complexity from 103 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=Smartappli_PyBehaviorLog&issues=AZ3jYQaYgcaH22CkagCA&open=AZ3jYQaYgcaH22CkagCA&pullRequest=41
session: ObservationSession, raw_text: str, *, strict: bool = False
) -> tuple[dict, dict]:
"""Parse CowLog-style plain text results into a session import payload."""
behavior_lookup = _token_lookup_map(session.project.behaviors.all())
modifier_lookup = _token_lookup_map(session.project.modifiers.all())
Expand Down Expand Up @@ -2199,10 +2201,12 @@
tokens = parts[1:]
behavior = behavior_lookup.get(tokens[0].casefold())
if behavior is None:
warnings.append(
_('Line %(line)s: unknown behavior token “%(token)s”.')
% {'line': line_number, 'token': tokens[0]}
)
message = _(
'Line %(line)s: unknown behavior token “%(token)s”.'
) % {'line': line_number, 'token': tokens[0]}
if strict:
raise ValueError(message)
warnings.append(message)
continue
event_kind = ObservationEvent.KIND_POINT
modifier_names: list[str] = []
Expand Down Expand Up @@ -2262,7 +2266,11 @@


def parse_tabular_session_rows(
session: ObservationSession, rows: list[dict[str, object]], *, source_format: str
session: ObservationSession,
rows: list[dict[str, object]],
*,
source_format: str,
strict: bool = False,
) -> tuple[dict, dict]:
"""Parse CSV/TSV/XLSX rows with BORIS-like columns into a session payload."""
behavior_lookup = _token_lookup_map(session.project.behaviors.all())
Expand Down Expand Up @@ -2317,10 +2325,13 @@
continue
timestamp_decimal = _decimal(time_token, default='NaN', frame_rate=frame_rate_token)
if timestamp_decimal.is_nan():
warnings.append(
_('Row %(row)s: invalid time value “%(value)s”.')
% {'row': index, 'value': time_token}
)
message = _('Row %(row)s: invalid time value “%(value)s”.') % {
'row': index,
'value': time_token,
}
if strict:
raise ValueError(message)
warnings.append(message)
continue
timestamp = float(timestamp_decimal)
stop_seconds = None
Expand All @@ -2338,10 +2349,13 @@
stop_seconds = float(timestamp_decimal + duration_decimal)
behavior = behavior_lookup.get(str(behavior_token).casefold())
if behavior is None:
warnings.append(
_('Row %(row)s: unknown behavior token “%(token)s”.')
% {'row': index, 'token': behavior_token}
)
message = _('Row %(row)s: unknown behavior token “%(token)s”.') % {
'row': index,
'token': behavior_token,
}
if strict:
raise ValueError(message)
warnings.append(message)
continue
line_count += 1
explicit_kind = _resolve_event_kind_token(
Expand Down Expand Up @@ -2424,7 +2438,7 @@


def parse_tabular_session_file(
session: ObservationSession, uploaded_file, raw_bytes: bytes
session: ObservationSession, uploaded_file, raw_bytes: bytes, *, strict: bool = False
) -> tuple[dict, dict]:
"""Parse CSV/TSV/XLSX tabular imports modeled on BORIS tabular exports."""
filename = str(getattr(uploaded_file, 'name', '') or '').lower()
Expand All @@ -2440,7 +2454,9 @@
row_dicts.append(
{headers[index]: row[index] for index in range(min(len(headers), len(row)))}
)
return parse_tabular_session_rows(session, row_dicts, source_format='boris-tabular-xlsx-v1')
return parse_tabular_session_rows(
session, row_dicts, source_format='boris-tabular-xlsx-v1', strict=strict
)

try:
text_payload = raw_bytes.decode('utf-8-sig')
Expand All @@ -2463,10 +2479,12 @@
for row in reader:
rows.append({str(key): value for key, value in row.items() if key is not None})
source_format = 'boris-tabular-tsv-v1' if delimiter == ' ' else 'boris-tabular-csv-v1'
return parse_tabular_session_rows(session, rows, source_format=source_format)
return parse_tabular_session_rows(session, rows, source_format=source_format, strict=strict)


def load_session_import_payload(uploaded_file, session: ObservationSession) -> tuple[dict, dict]:
def load_session_import_payload(

Check failure on line 2485 in tracker/views.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this function to reduce its Cognitive Complexity from 18 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=Smartappli_PyBehaviorLog&issues=AZ3jYQaZgcaH22CkagCB&open=AZ3jYQaZgcaH22CkagCB&pullRequest=41
uploaded_file, session: ObservationSession, *, strict: bool = False
) -> tuple[dict, dict]:
"""Load session payloads from PyBehaviorLog/BORIS JSON, tabular imports, or CowLog text exports."""
raw_bytes = uploaded_file.read()
report = {'warnings': []}
Expand All @@ -2483,7 +2501,9 @@
report['source_name'] = candidate
return payload, report
if filename.endswith(('.csv', '.tsv', '.xlsx')):
payload, parsed_report = parse_tabular_session_file(session, uploaded_file, raw_bytes)
payload, parsed_report = parse_tabular_session_file(
session, uploaded_file, raw_bytes, strict=strict
)
report.update(parsed_report)
return payload, report
try:
Expand All @@ -2509,14 +2529,19 @@
else:
first_token_is_time = False
if first_token_is_time and filename.endswith('.txt'):
payload, parsed_report = parse_cowlog_results_text(session, text_payload)
report['source_hint'] = 'cowlog_text_time_token'
payload, parsed_report = parse_cowlog_results_text(session, text_payload, strict=strict)
report.update(parsed_report)
return payload, report
if ',' in first_line or ';' in first_line or ' ' in first_line:
payload, parsed_report = parse_tabular_session_file(session, uploaded_file, raw_bytes)
report['source_hint'] = 'tabular_header_delimiter'
payload, parsed_report = parse_tabular_session_file(
session, uploaded_file, raw_bytes, strict=strict
)
report.update(parsed_report)
return payload, report
payload, parsed_report = parse_cowlog_results_text(session, text_payload)
report['source_hint'] = 'cowlog_text_fallback'
payload, parsed_report = parse_cowlog_results_text(session, text_payload, strict=strict)
report.update(parsed_report)
return payload, report

Expand Down Expand Up @@ -2569,6 +2594,7 @@
'certified_against_built_in_corpus': True,
'fixture_version': '0.9.1',
},
'extension_profile': EXTENSION_PROFILE,
}
if state_event_count:
report['cowlog']['warnings'].append(
Expand Down Expand Up @@ -2611,6 +2637,8 @@
],
'supported_cowlog_exports': ['plain_text_results'],
'supported_boris_imports': ['json_project', 'json_observation', 'csv', 'tsv', 'xlsx'],
'supported_schema_matrix': SUPPORTED_SCHEMA_MATRIX,
'extension_profile': EXTENSION_PROFILE,
'notes': [
_(
'BORIS interoperability is strongest when using the documented JSON project/observation workflows and tabular exports.'
Expand Down Expand Up @@ -2746,39 +2774,62 @@
return bool(value and re.fullmatch(pattern, value))


SUPPORTED_SCHEMA_MATRIX = {
'session_exact': ['pybehaviorlog-v6-session'],
'session_patterns': [
r'cowlog-django-v\d+-session',
r'pybehaviorlog-0(?:\.\d+)*-session',
r'cowlog-results-v\d+',
r'boris-tabular-(?:csv|tsv|xlsx)-v\d+',
r'boris-tabular-spreadsheet-v\d+',
],
'observation_patterns': [r'boris-observation-v\d+'],
'project_patterns': [r'boris-project-v\d+', r'pybehaviorlog-0(?:\.\d+)*-bundle'],
'ethogram_patterns': [
r'cowlog-django-v\d+-ethogram',
r'pybehaviorlog-0(?:\.\d+)*-ethogram',
r'boris-project-v\d+',
r'boris-observation-v\d+',
],
}

EXTENSION_PROFILE = {
'profile_version': '1.0',
'extensions': {
'cowlog_metadata_headers': '1.0',
'cowlog_metadata_annotations': '1.0',
'cowlog_export_observer_fps': '1.0',
'boris_observation_merge_notes': '1.0',
'schema_regex_families': '1.0',
},
}


def _is_supported_session_schema(value: str | None) -> bool:
return any(
(
_schema_matches(value, r'cowlog-django-v\d+-session'),
_schema_matches(value, r'pybehaviorlog-0(?:\.\d+)*-session'),
_schema_matches(value, r'cowlog-results-v\d+'),
_schema_matches(value, r'boris-tabular-(?:csv|tsv|xlsx)-v\d+'),
_schema_matches(value, r'boris-tabular-spreadsheet-v\d+'),
return bool(
value in SUPPORTED_SCHEMA_MATRIX['session_exact']
or any(
_schema_matches(value, pattern)
for pattern in SUPPORTED_SCHEMA_MATRIX['session_patterns']
)
)


def _is_supported_observation_schema(value: str | None) -> bool:
return _schema_matches(value, r'boris-observation-v\d+')
return any(
_schema_matches(value, pattern) for pattern in SUPPORTED_SCHEMA_MATRIX['observation_patterns']
)


def _is_supported_project_schema(value: str | None) -> bool:
return any(
(
_schema_matches(value, r'boris-project-v\d+'),
_schema_matches(value, r'pybehaviorlog-0(?:\.\d+)*-bundle'),
)
_schema_matches(value, pattern) for pattern in SUPPORTED_SCHEMA_MATRIX['project_patterns']
)


def _is_supported_ethogram_schema(value: str | None) -> bool:
return any(
(
_schema_matches(value, r'cowlog-django-v\d+-ethogram'),
_schema_matches(value, r'pybehaviorlog-0(?:\.\d+)*-ethogram'),
_schema_matches(value, r'boris-project-v\d+'),
_schema_matches(value, r'boris-observation-v\d+'),
)
_schema_matches(value, pattern) for pattern in SUPPORTED_SCHEMA_MATRIX['ethogram_patterns']
)


Expand Down Expand Up @@ -5828,6 +5879,7 @@
f'attachment; filename="session_{session.pk}_cowlog_compatible.txt"'
)
response.write('# PyBehaviorLog 0.9.5 CowLog-compatible export\n')
response.write(f'# extension_profile\t{EXTENSION_PROFILE["profile_version"]}\n')
response.write(f'# session\t{session.title}\n')
response.write(f'# project\t{session.project.name}\n')
response.write(f'# observer\t{session.observer.username if session.observer else ""}\n')
Expand Down
Loading