-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
61 lines (50 loc) · 2.14 KB
/
utils.py
File metadata and controls
61 lines (50 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from datetime import datetime
from typing import Dict
import hashlib
from dateutil.parser import parse as parse_date # mover al inicio si prefieres
def generate_message_id(message: Dict) -> str:
"""
🔧 Generate a unique message ID using username, timestamp and text hash.
Falls back to 'bot_or_unknown' if username is missing or invalid.
"""
# Get timestamp
if isinstance(message.get('timestamp'), datetime):
timestamp_str = message['timestamp'].isoformat()
elif 'timestamp_str' in message and message['timestamp_str']:
timestamp_str = message['timestamp_str']
elif 'timestamp' in message and isinstance(message['timestamp'], str):
timestamp_str = message['timestamp']
else:
timestamp_str = datetime.now().isoformat()
# Hash of the message text
text_hash = hashlib.md5(message.get('text', '').encode('utf-8')).hexdigest()[:8]
# Determine username
raw_username = str(message.get('username', '')).strip().lower()
if not raw_username or raw_username in ['unknown', 'none', 'null']:
username = "bot_or_unknown"
else:
username = raw_username
return f"{username}_{timestamp_str}_{text_hash}"
def enrich_message(message: Dict, file_source: str = "unknown") -> Dict:
"""
Asegura que un mensaje tenga todos los campos esperados,
incluyendo timestamp como datetime, timestamp_str e ID.
"""
# Normalizar timestamp
if 'timestamp' not in message or not message['timestamp']:
message['timestamp'] = datetime.now()
elif isinstance(message['timestamp'], str):
try:
message['timestamp'] = parse_date(message['timestamp'])
except Exception:
message['timestamp'] = datetime.now()
# Asegurar string legible de timestamp
message['timestamp_str'] = message['timestamp'].isoformat()
# Username
if not message.get('username'):
message['username'] = "unknown"
# File source
message['file_source'] = file_source or "unknown"
# Generar ID único
message['message_id'] = generate_message_id(message)
return message