Skip to content

Commit 7a8572d

Browse files
authored
test: add no-network and install profile gates (#137)
* test: add no-network and install profile gates * fix: avoid importing optional modules in telemetry probes * fix: make telemetry init fully non-blocking * test: harden telemetry nonblocking assertions * test: cover optional dependency fallback paths
1 parent 8bdd56e commit 7a8572d

10 files changed

Lines changed: 655 additions & 113 deletions

File tree

.github/workflows/ci.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,47 @@ jobs:
154154
flags: ${{ matrix.install-profile }}-py${{ matrix.python-version }}
155155
token: ${{ secrets.CODECOV_TOKEN }}
156156

157+
profile-smoke:
158+
runs-on: ubuntu-latest
159+
strategy:
160+
fail-fast: false
161+
matrix:
162+
install-profile:
163+
- core
164+
- cli
165+
- nlp
166+
- nlp-advanced
167+
- ocr
168+
- distributed
169+
- web
170+
steps:
171+
- uses: actions/checkout@v4
172+
- name: Set up Python
173+
uses: actions/setup-python@v5
174+
with:
175+
python-version: "3.11"
176+
cache: "pip"
177+
178+
- name: Upgrade pip
179+
run: |
180+
python -m pip install --upgrade pip
181+
182+
- name: Install dependencies (core)
183+
if: matrix.install-profile == 'core'
184+
run: |
185+
pip install -e ".[test]"
186+
187+
- name: Install dependencies (profile)
188+
if: matrix.install-profile != 'core'
189+
run: |
190+
pip install -e ".[test,${{ matrix.install-profile }}]"
191+
192+
- name: Run install profile smoke test
193+
env:
194+
DATAFOG_INSTALL_PROFILE: ${{ matrix.install-profile }}
195+
run: |
196+
pytest tests/test_install_profiles.py -q
197+
157198
wheel-size:
158199
runs-on: ubuntu-latest
159200
steps:

datafog/engine.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -171,17 +171,13 @@ def _gliner_entities(text: str) -> list[Entity]:
171171
def _get_spacy_annotator():
172172
try:
173173
from .processing.text_processing.spacy_pii_annotator import SpacyPIIAnnotator
174-
except ImportError:
175-
return _UnavailableAnnotator(
176-
"SpaCy engine requires the nlp extra. Install with: pip install datafog[nlp]"
177-
)
174+
except ImportError as exc:
175+
return _UnavailableAnnotator(str(exc))
178176

179177
try:
180178
return SpacyPIIAnnotator.create()
181-
except ImportError:
182-
return _UnavailableAnnotator(
183-
"SpaCy engine requires the nlp extra. Install with: pip install datafog[nlp]"
184-
)
179+
except ImportError as exc:
180+
return _UnavailableAnnotator(str(exc))
185181
except Exception as exc:
186182
return _UnavailableAnnotator(
187183
f"SpaCy engine initialization failed: {type(exc).__name__}: {exc}"
@@ -192,19 +188,13 @@ def _get_spacy_annotator():
192188
def _get_gliner_annotator():
193189
try:
194190
from .processing.text_processing.gliner_annotator import GLiNERAnnotator
195-
except ImportError:
196-
return _UnavailableAnnotator(
197-
"GLiNER engine requires the nlp-advanced extra. "
198-
"Install with: pip install datafog[nlp-advanced]"
199-
)
191+
except ImportError as exc:
192+
return _UnavailableAnnotator(str(exc))
200193

201194
try:
202195
annotator = GLiNERAnnotator.create()
203-
except ImportError:
204-
return _UnavailableAnnotator(
205-
"GLiNER engine requires the nlp-advanced extra. "
206-
"Install with: pip install datafog[nlp-advanced]"
207-
)
196+
except ImportError as exc:
197+
return _UnavailableAnnotator(str(exc))
208198
except Exception as exc:
209199
return _UnavailableAnnotator(
210200
f"GLiNER engine initialization failed: {type(exc).__name__}: {exc}"

datafog/models/spacy_nlp.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from uuid import uuid4
1010

1111
import spacy
12-
from rich.progress import track
1312

1413
from .annotator import AnnotationResult, AnnotatorRequest
1514

@@ -53,7 +52,7 @@ def annotate_text(self, text: str, language: str = "en") -> List[AnnotationResul
5352
)
5453
doc = self.nlp(annotator_request.text)
5554
results = []
56-
for ent in track(doc.ents, description="Processing entities"):
55+
for ent in doc.ents:
5756
result = AnnotationResult(
5857
start=ent.start_char,
5958
end=ent.end_char,

datafog/telemetry.py

Lines changed: 80 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@
1313
"""
1414

1515
import hashlib
16+
import importlib.util
1617
import json
1718
import os
1819
import platform
20+
import sys
1921
import threading
2022
import time
2123
import urllib.request
@@ -114,44 +116,28 @@ def _get_duration_bucket(duration_ms: float) -> str:
114116

115117
def _detect_installed_extras() -> list:
116118
"""Probe which optional extras are installed."""
117-
extras = []
118119

119-
try:
120-
import spacy # noqa: F401
121-
122-
extras.append("nlp")
123-
except ImportError:
124-
pass
125-
126-
try:
127-
import gliner # noqa: F401
128-
129-
extras.append("nlp-advanced")
130-
except ImportError:
131-
pass
132-
133-
try:
134-
import pytesseract # noqa: F401
135-
136-
extras.append("ocr")
137-
except ImportError:
138-
pass
139-
140-
try:
141-
import typer # noqa: F401
142-
143-
extras.append("cli")
144-
except ImportError:
145-
pass
146-
147-
try:
148-
import pyspark # noqa: F401
149-
150-
extras.append("distributed")
151-
except ImportError:
152-
pass
153-
154-
return extras
120+
def _module_available(module_name: str) -> bool:
121+
module = sys.modules.get(module_name)
122+
if module is not None and getattr(module, "__spec__", None) is None:
123+
return True
124+
try:
125+
return importlib.util.find_spec(module_name) is not None
126+
except (ImportError, ValueError):
127+
return False
128+
129+
module_to_extra = {
130+
"spacy": "nlp",
131+
"gliner": "nlp-advanced",
132+
"pytesseract": "ocr",
133+
"typer": "cli",
134+
"pyspark": "distributed",
135+
}
136+
return [
137+
extra
138+
for module_name, extra in module_to_extra.items()
139+
if _module_available(module_name)
140+
]
155141

156142

157143
def _detect_ci() -> bool:
@@ -170,39 +156,69 @@ def _detect_ci() -> bool:
170156
return any(os.environ.get(v) for v in ci_vars)
171157

172158

173-
def _send_event(event_name: str, properties: dict) -> None:
174-
"""POST event to PostHog /capture/ endpoint in a daemon thread.
159+
def _post_event(event_name: str, properties: dict) -> None:
160+
"""POST event to PostHog /capture/ endpoint.
175161
176-
Fire-and-forget: failures are silently ignored.
162+
Fire-and-forget callers run this in daemon threads. Failures are silently
163+
ignored so telemetry can never affect SDK behavior.
177164
"""
165+
try:
166+
payload = json.dumps(
167+
{
168+
"api_key": _POSTHOG_API_KEY,
169+
"event": event_name,
170+
"properties": {
171+
"distinct_id": _get_anonymous_id(),
172+
**properties,
173+
},
174+
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime()),
175+
}
176+
).encode("utf-8")
177+
178+
req = urllib.request.Request(
179+
f"{_POSTHOG_HOST}/capture/",
180+
data=payload,
181+
headers={"Content-Type": "application/json"},
182+
method="POST",
183+
)
184+
urllib.request.urlopen(req, timeout=5)
185+
except Exception:
186+
pass
187+
188+
189+
def _send_event(event_name: str, properties: dict) -> None:
190+
"""POST event to PostHog /capture/ endpoint in a daemon thread."""
178191
if not _is_telemetry_enabled():
179192
return
180193

181-
def _post():
194+
t = threading.Thread(target=_post_event, args=(event_name, properties), daemon=True)
195+
t.start()
196+
197+
198+
def _send_init_event() -> None:
199+
"""Build and send the process init event without blocking API calls."""
200+
201+
def _post_init():
182202
try:
183-
payload = json.dumps(
184-
{
185-
"api_key": _POSTHOG_API_KEY,
186-
"event": event_name,
187-
"properties": {
188-
"distinct_id": _get_anonymous_id(),
189-
**properties,
190-
},
191-
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime()),
192-
}
193-
).encode("utf-8")
194-
195-
req = urllib.request.Request(
196-
f"{_POSTHOG_HOST}/capture/",
197-
data=payload,
198-
headers={"Content-Type": "application/json"},
199-
method="POST",
200-
)
201-
urllib.request.urlopen(req, timeout=5)
203+
from .__about__ import __version__
202204
except Exception:
203-
pass
204-
205-
t = threading.Thread(target=_post, daemon=True)
205+
__version__ = "unknown"
206+
207+
uname = platform.uname()
208+
_post_event(
209+
"datafog_init",
210+
{
211+
"package_version": __version__,
212+
"python_version": platform.python_version(),
213+
"os": uname.system,
214+
"os_version": uname.release,
215+
"arch": uname.machine,
216+
"installed_extras": _detect_installed_extras(),
217+
"is_ci": _detect_ci(),
218+
},
219+
)
220+
221+
t = threading.Thread(target=_post_init, daemon=True)
206222
t.start()
207223

208224

@@ -220,24 +236,7 @@ def _ensure_initialized() -> None:
220236
if not _is_telemetry_enabled():
221237
return
222238

223-
try:
224-
from .__about__ import __version__
225-
except Exception:
226-
__version__ = "unknown"
227-
228-
uname = platform.uname()
229-
_send_event(
230-
"datafog_init",
231-
{
232-
"package_version": __version__,
233-
"python_version": platform.python_version(),
234-
"os": uname.system,
235-
"os_version": uname.release,
236-
"arch": uname.machine,
237-
"installed_extras": _detect_installed_extras(),
238-
"is_ci": _detect_ci(),
239-
},
240-
)
239+
_send_init_event()
241240

242241

243242
def track_function_call(function_name: str, module: str, **kwargs) -> None:

0 commit comments

Comments
 (0)