Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d3bd048
feat!: migrate python SDK to v2 API surface
VinciGit00 Mar 30, 2026
556060f
feat: add v2 examples for all endpoints
VinciGit00 Mar 30, 2026
d48fcf2
feat: rewrite all examples for v2 API surface
VinciGit00 Mar 30, 2026
1bcd2f6
docs: add v1 to v2 migration guide
VinciGit00 Mar 30, 2026
844bb59
fix: update API base URL to /api/v2
VinciGit00 Mar 30, 2026
9c4c499
refactor: remove schema endpoint
VinciGit00 Mar 30, 2026
d435e7a
ci: reduce test matrix to Python 3.12 only and fix CI failures
VinciGit00 Apr 7, 2026
d4a67e4
Revert "ci: reduce test matrix to Python 3.12 only and fix CI failures"
VinciGit00 Apr 7, 2026
5d4e372
ci: fix CI failures — single Python 3.12 test, fix lint and deps
VinciGit00 Apr 7, 2026
82b126a
ci: consolidate to single test workflow
VinciGit00 Apr 7, 2026
9d2db25
fix: resolve merge conflict in test workflow
VinciGit00 Apr 7, 2026
573d036
feat: replace stealth/render_js booleans with FetchMode enum in Fetch…
VinciGit00 Apr 9, 2026
6355efd
chore: remove FetchConfig/LlmConfig extract examples
VinciGit00 Apr 9, 2026
b27a124
feat: add location_geo_code param to search endpoint and camelCase se…
VinciGit00 Apr 10, 2026
4555959
fix: rename monitor 'cron' field to 'interval' to match API contract
VinciGit00 Apr 10, 2026
51d44c8
style: fix black formatting in shared.py
VinciGit00 Apr 10, 2026
2a73ae9
feat(api): align python sdk with sgai v2
lurenss Apr 13, 2026
331b86e
refactor(api): align python sdk with v2 schema surface
lurenss Apr 13, 2026
f246c76
refactor(api): split fetch mode into mode + stealth toggle
VinciGit00 Apr 14, 2026
5b62652
style: fix black formatting in test files
VinciGit00 Apr 14, 2026
8e2a7fc
fix: align search defaults and migration doc with v2 API contract
VinciGit00 Apr 14, 2026
41aff0f
refactor: rename location_geo_code to country in search
VinciGit00 Apr 14, 2026
2540a1d
fix: remove locationGeoCode alias, send country directly on wire
VinciGit00 Apr 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 0 additions & 63 deletions .github/workflows/codeql.yml

This file was deleted.

39 changes: 0 additions & 39 deletions .github/workflows/dependency-review.yml

This file was deleted.

31 changes: 0 additions & 31 deletions .github/workflows/pylint.yml

This file was deleted.

60 changes: 5 additions & 55 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,68 +29,18 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest pytest-asyncio responses aioresponses
pip install pytest pytest-asyncio responses aioresponses flake8 black isort
cd scrapegraph-py
pip install -e ".[html]"

- name: Run tests
run: |
cd scrapegraph-py
pytest tests/ -v --ignore=tests/test_integration_v2.py
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./scrapegraph-py/coverage.xml
flags: unittests
name: codecov-umbrella
fail_ci_if_error: false

lint:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 black isort mypy
cd scrapegraph-py
pip install -e .

- name: Run linting
- name: Lint
run: |
cd scrapegraph-py
flake8 scrapegraph_py/ tests/ --max-line-length=120 --extend-ignore=E203,W503,E501,F401,F841
black --check scrapegraph_py/ tests/
isort --check-only scrapegraph_py/ tests/
mypy scrapegraph_py/ --ignore-missing-imports

security:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install bandit safety
cd scrapegraph-py
pip install -e .
flake8 scrapegraph_py/ tests/ --max-line-length=120 --extend-ignore=E203,W503,E501,F401,F841

- name: Run security checks
- name: Run tests
run: |
cd scrapegraph-py
bandit -r scrapegraph_py/ -f json -o bandit-report.json || true
safety check --json --output safety-report.json || true
pytest tests/ -v --ignore=tests/test_integration_v2.py
33 changes: 33 additions & 0 deletions examples/async_crawl_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""
Async crawl example.
"""

import asyncio
import json

from scrapegraph_py import AsyncClient


async def main():
async with AsyncClient() as client:
# Start crawl
job = await client.crawl.start(
"https://example.com",
depth=2,
max_pages=5,
)
print("Crawl started:", json.dumps(job, indent=2))

# Poll for completion
crawl_id = job["id"]
while True:
status = await client.crawl.status(crawl_id)
print(f"Status: {status.get('status')}")
if status.get("status") in ("completed", "failed"):
break
await asyncio.sleep(2)

print("\nResult:", json.dumps(status, indent=2))


asyncio.run(main())
17 changes: 17 additions & 0 deletions examples/async_credits_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""
Async credits check.
"""

import asyncio
import json

from scrapegraph_py import AsyncClient


async def main():
async with AsyncClient() as client:
credits = await client.credits()
print(json.dumps(credits, indent=2))


asyncio.run(main())
40 changes: 40 additions & 0 deletions examples/async_extract_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""
Async extract example - extract data from multiple pages concurrently.
"""

import asyncio
import json

from pydantic import BaseModel, Field

from scrapegraph_py import AsyncClient


class PageInfo(BaseModel):
title: str = Field(description="Page title")
description: str = Field(description="Brief description of the page content")


async def main():
async with AsyncClient() as client:
urls = [
"https://example.com",
"https://httpbin.org/html",
]

tasks = [
client.extract(
url=url,
prompt="Extract the page title and a brief description",
output_schema=PageInfo,
)
for url in urls
]
results = await asyncio.gather(*tasks)

for url, result in zip(urls, results):
print(f"\n=== {url} ===")
print(json.dumps(result, indent=2))


asyncio.run(main())
27 changes: 27 additions & 0 deletions examples/async_monitor_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""
Async monitor example.
"""

import asyncio
import json

from scrapegraph_py import AsyncClient


async def main():
async with AsyncClient() as client:
# Create a monitor
monitor = await client.monitor.create(
name="Async Price Tracker",
url="https://example.com/products",
prompt="Extract product prices",
interval="0 12 * * *", # Every day at noon
)
print("Created:", json.dumps(monitor, indent=2))

# List all monitors
all_monitors = await client.monitor.list()
print("\nAll monitors:", json.dumps(all_monitors, indent=2))


asyncio.run(main())
27 changes: 27 additions & 0 deletions examples/async_scrape_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""
Async scrape example - scrape multiple pages concurrently.
"""

import asyncio
import json

from scrapegraph_py import AsyncClient


async def main():
async with AsyncClient() as client:
# Scrape multiple pages concurrently
urls = [
"https://example.com",
"https://httpbin.org/html",
]

tasks = [client.scrape(url) for url in urls]
results = await asyncio.gather(*tasks)

for url, result in zip(urls, results):
print(f"\n=== {url} ===")
print(json.dumps(result, indent=2))


asyncio.run(main())
26 changes: 26 additions & 0 deletions examples/async_search_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Async search example - run multiple searches concurrently.
"""

import asyncio
import json

from scrapegraph_py import AsyncClient


async def main():
async with AsyncClient() as client:
queries = [
"best python frameworks 2025",
"top javascript libraries 2025",
]

tasks = [client.search(q, num_results=3) for q in queries]
results = await asyncio.gather(*tasks)

for query, result in zip(queries, results):
print(f"\n=== {query} ===")
print(json.dumps(result, indent=2))


asyncio.run(main())
Loading
Loading