From b22052d888b5978123aaa914acb30f6a6be0bb3a Mon Sep 17 00:00:00 2001 From: myst25 <83259411+myst-25@users.noreply.github.com> Date: Wed, 22 Apr 2026 22:14:21 +0530 Subject: [PATCH 01/24] Initial commit --- README.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..f2f0331 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# morphe-apk-scraper +Auto-scrapes exact APK versions needed by Morphe patches and uploads to releases From 53fe68170443de873787441634fc1f5c9f6cb1fa Mon Sep 17 00:00:00 2001 From: myst25 <83259411+myst-25@users.noreply.github.com> Date: Wed, 22 Apr 2026 22:19:59 +0530 Subject: [PATCH 02/24] init: add scraper.py, scrape.yml and apps.json --- .github/workflows/scrape.yml | 46 ++++++ README.md | 33 +++- apps.json | 156 +++++++++++++++++++ requirements.txt | 3 + scraper.py | 284 +++++++++++++++++++++++++++++++++++ 5 files changed, 521 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/scrape.yml create mode 100644 apps.json create mode 100644 requirements.txt create mode 100644 scraper.py diff --git a/.github/workflows/scrape.yml b/.github/workflows/scrape.yml new file mode 100644 index 0000000..4a37670 --- /dev/null +++ b/.github/workflows/scrape.yml @@ -0,0 +1,46 @@ +name: Scrape APKs + +on: + schedule: + - cron: '0 2 * * *' # Daily at 2 AM UTC + workflow_dispatch: # Manual trigger + +jobs: + scrape: + runs-on: ubuntu-latest + permissions: write-all + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: pip install requests beautifulsoup4 + + - name: Run scraper + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + run: python scraper.py + + - name: Upload scrape results artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: scrape-results + path: scrape_results.json + retention-days: 7 + + - name: Trigger Morphed-apps build + if: success() + env: + GH_TOKEN: ${{ secrets.MORPHE_PAT }} + run: | + gh workflow run build.yml \ + --repo myst-25/Morphed-apps \ + --ref main diff --git a/README.md b/README.md index f2f0331..779c700 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,33 @@ # morphe-apk-scraper -Auto-scrapes exact APK versions needed by Morphe patches and uploads to releases + +Auto-scrapes exact APK versions needed by [Morphe patches](https://github.com/MorpheApp/morphe-patches) and uploads them to GitHub Releases so the [Morphed-apps](https://github.com/myst-25/Morphed-apps) build workflow can use them as a reliable APK source. + +## How it works + +1. `apps.json` — list of all apps with their exact version required by the patches +2. `scraper.py` — scrapes APKMirror for each app, downloads the exact APK, uploads to this repo's GitHub Releases under the tag `apks` +3. `scrape.yml` — runs daily at 2 AM UTC, triggers `Morphed-apps` build workflow after scraping + +## Setup + +### Required Secrets + +Add these secrets to this repo (`Settings → Secrets → Actions`): + +| Secret | Description | +|--------|-------------| +| `GITHUB_TOKEN` | Auto-provided by GitHub Actions | +| `MORPHE_PAT` | Personal Access Token with `workflow` scope — needed to trigger the build in `Morphed-apps` | + +### How to get MORPHE_PAT + +1. Go to GitHub → Settings → Developer Settings → Personal Access Tokens → Fine-grained tokens +2. Create token with access to `myst-25/Morphed-apps` +3. Grant **Actions: Read & Write** permission +4. Copy the token and add it as `MORPHE_PAT` secret in this repo + +## APK Release + +All scraped APKs are uploaded to the [`apks` release](https://github.com/myst-25/morphe-apk-scraper/releases/tag/apks) of this repo. + +The `config.toml` in `Morphed-apps` points `archive-dlurl` to these assets. diff --git a/apps.json b/apps.json new file mode 100644 index 0000000..8339991 --- /dev/null +++ b/apps.json @@ -0,0 +1,156 @@ +[ + { + "name": "YouTube", + "package": "com.google.android.youtube", + "version": "20.47.62", + "apkmirror_url": "https://www.apkmirror.com/apk/google-inc/youtube/youtube-20-47-62-release/", + "arch": "nodpi" + }, + { + "name": "YouTube Music", + "package": "com.google.android.apps.youtube.music", + "version": "8.47.56", + "apkmirror_url": "https://www.apkmirror.com/apk/google-inc/youtube-music/youtube-music-8-47-56-release/", + "arch": "arm64-v8a" + }, + { + "name": "Reddit", + "package": "com.reddit.frontpage", + "version": "2026.10.0", + "apkmirror_url": "https://www.apkmirror.com/apk/redditinc/reddit/reddit-2026-10-0-release/", + "arch": "nodpi" + }, + { + "name": "Amazon Prime Video", + "package": "com.amazon.avod.thirdpartyclient", + "version": "3.0.447.757", + "apkmirror_url": "https://www.apkmirror.com/apk/amazon-mobile-llc/amazon-prime-video/amazon-prime-video-3-0-447-757-release/", + "arch": "nodpi" + }, + { + "name": "Duolingo", + "package": "com.duolingo", + "version": "6.74.4", + "apkmirror_url": "https://www.apkmirror.com/apk/duolingo/duolingo-duolingo/duolingo-duolingo-6-74-4-release/", + "arch": "nodpi" + }, + { + "name": "Proton VPN", + "package": "ch.protonvpn.android", + "version": "5.16.83.0", + "apkmirror_url": "https://www.apkmirror.com/apk/proton-technologies-ag/proton-vpn-fast-secure/proton-vpn-fast-secure-5-16-83-0-release/", + "arch": "nodpi" + }, + { + "name": "SoundCloud", + "package": "com.soundcloud.android", + "version": "2026.03.20-release", + "apkmirror_url": "https://www.apkmirror.com/apk/soundcloud/soundcloud-play-music-songs/soundcloud-play-music-songs-2026-03-20-release/", + "arch": "nodpi" + }, + { + "name": "Pandora", + "package": "com.pandora.android", + "version": null, + "apkmirror_url": "https://www.apkmirror.com/apk/pandora/pandora-radio-music-podcasts/", + "arch": "nodpi" + }, + { + "name": "WPS Office", + "package": "cn.wps.moffice_eng", + "version": "18.24", + "apkmirror_url": "https://www.apkmirror.com/apk/kingsoft-office-software-corporation-limited/wps-office-free-office-suite-for-word-pdf-excel/wps-office-free-office-suite-for-word-pdf-excel-18-24-release/", + "arch": "nodpi" + }, + { + "name": "MyFitnessPal", + "package": "com.myfitnesspal.android", + "version": "25.50.0", + "apkmirror_url": "https://www.apkmirror.com/apk/under-armour/myfitnesspal/myfitnesspal-25-50-0-release/", + "arch": "nodpi" + }, + { + "name": "AdGuard", + "package": "com.adguard.android", + "version": "4.12.81", + "apkmirror_url": "https://www.apkmirror.com/apk/adguard-software-limited/adguard-ad-blocker/adguard-ad-blocker-4-12-81-release/", + "arch": "nodpi" + }, + { + "name": "SofaScore", + "package": "com.sofascore.results", + "version": "25.12.17", + "apkmirror_url": "https://www.apkmirror.com/apk/sofascore/sofascore-sports-scores/sofascore-sports-scores-25-12-17-release/", + "arch": "nodpi" + }, + { + "name": "Nova Launcher", + "package": "com.teslacoilsw.launcher", + "version": "81042", + "apkmirror_url": "https://www.apkmirror.com/apk/teslacoil-software/nova-launcher/nova-launcher-8-5-1-release/", + "arch": "nodpi" + }, + { + "name": "Podcast Addict", + "package": "com.bambuna.podcastaddict", + "version": "2026.4", + "apkmirror_url": "https://www.apkmirror.com/apk/xavier-michiels/podcast-addict/podcast-addict-2026-4-release/", + "arch": "nodpi" + }, + { + "name": "Xodo PDF", + "package": "com.xodo.pdf.reader", + "version": "10.13.0", + "apkmirror_url": "https://www.apkmirror.com/apk/xodo-technologies-inc/xodo-pdf-reader-editor/xodo-pdf-reader-editor-10-13-0-release/", + "arch": "nodpi" + }, + { + "name": "Solid Explorer", + "package": "pl.solidexplorer2", + "version": "3.4.1", + "apkmirror_url": "https://www.apkmirror.com/apk/neatbytes/solid-explorer-file-manager/solid-explorer-file-manager-3-4-1-release/", + "arch": "nodpi" + }, + { + "name": "ibis Paint X", + "package": "jp.ne.ibis.ibispaintx.app", + "version": "14.0.0", + "apkmirror_url": "https://www.apkmirror.com/apk/ibis-inc/ibis-paint-x/ibis-paint-x-14-0-0-release/", + "arch": "nodpi" + }, + { + "name": "Windy", + "package": "com.windyty.android", + "version": "49.0.1", + "apkmirror_url": "https://www.apkmirror.com/apk/windyty-se/windy-wind-weather-forecast/windy-wind-weather-forecast-49-0-1-release/", + "arch": "nodpi" + }, + { + "name": "Merriam-Webster", + "package": "com.merriamwebster", + "version": null, + "apkmirror_url": "https://www.apkmirror.com/apk/merriam-webster-inc/merriam-webster-dictionary/", + "arch": "nodpi" + }, + { + "name": "Mimo", + "package": "com.getmimo", + "version": "9.0", + "apkmirror_url": "https://www.apkmirror.com/apk/mimo-learn-coding-programming/mimo-learn-coding-programming/mimo-learn-coding-programming-9-0-release/", + "arch": "nodpi" + }, + { + "name": "Wallpaper Craft", + "package": "com.wallpaperscraft.wallpaper", + "version": "3.61.01", + "apkmirror_url": "https://www.apkmirror.com/apk/wallpaperscraft/wallpaper-engine-live-wallpaper/wallpaper-engine-live-wallpaper-3-61-01-release/", + "arch": "nodpi" + }, + { + "name": "FotMob", + "package": "com.mobilefootie.wc2010", + "version": "226.16092.20260302", + "apkmirror_url": "https://www.apkmirror.com/apk/fotmob-as/fotmob-live-soccer-scores/fotmob-live-soccer-scores-226-16092-20260302-release/", + "arch": "nodpi" + } +] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..02fb52c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +requests>=2.31.0 +beautifulsoup4>=4.12.0 +lxml>=5.0.0 diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..e8ed129 --- /dev/null +++ b/scraper.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python3 +""" +Morphe APK Scraper +Scrapes APKMirror for exact APK versions needed by Morphe patches +and uploads them to GitHub Releases. +""" + +import json +import os +import re +import sys +import time +import subprocess +from pathlib import Path + +import requests +from bs4 import BeautifulSoup + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36", + "Accept-Language": "en-US,en;q=0.9", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", +} + +APKMIRROR_BASE = "https://www.apkmirror.com" +DOWNLOAD_DIR = Path("apks") +APPS_FILE = Path("apps.json") +GITHUB_REPO = os.environ.get("GITHUB_REPOSITORY", "myst-25/morphe-apk-scraper") +GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "") +RELEASE_TAG = "apks" + + +def load_apps(): + with open(APPS_FILE) as f: + return json.load(f) + + +def get_soup(url, retries=3): + for attempt in range(retries): + try: + resp = requests.get(url, headers=HEADERS, timeout=30) + resp.raise_for_status() + return BeautifulSoup(resp.text, "html.parser") + except Exception as e: + print(f" [!] Attempt {attempt+1} failed for {url}: {e}") + time.sleep(5 * (attempt + 1)) + return None + + +def find_latest_version_url(base_url): + """When no version is pinned, find the latest APK page URL from listing.""" + soup = get_soup(base_url) + if not soup: + return None + link = soup.select_one(".appRowVariantTag~ .appRowVariantTag+ .table-cell a") + if not link: + # fallback: grab first release link + link = soup.select_one('a[href*="-release/"]') + if link: + return APKMIRROR_BASE + link["href"] + return None + + +def find_apk_download_page(release_url, arch): + """From a release page, find the direct APK variant download page link.""" + soup = get_soup(release_url) + if not soup: + return None + + # Look for APK (not APKM/XAPK) download links + for row in soup.select(".table-cell.rowheight"): + text = row.get_text() + # Prefer matching arch or nodpi / universal + if arch and arch != "nodpi": + if arch not in text and "universal" not in text.lower(): + continue + # Skip bundles + if "BUNDLE" in text.upper() or "APKS" in text.upper(): + continue + link = row.find("a", href=re.compile(r"/apk/.+download/")) + if link: + return APKMIRROR_BASE + link["href"] + + # Fallback: find any APK download variant link on page + link = soup.find("a", href=re.compile(r"/apk/.+download/"), string=re.compile(r"APK", re.I)) + if link: + return APKMIRROR_BASE + link["href"] + return None + + +def get_final_download_url(download_page_url): + """From APKMirror download page, extract the final direct download URL.""" + soup = get_soup(download_page_url) + if not soup: + return None + # The actual download button + btn = soup.select_one("a[href*='?key=']") or soup.select_one(".downloadButton a") or \ + soup.find("a", href=re.compile(r"download\.php\?key=")) + if btn: + href = btn.get("href", "") + if href.startswith("/"): + return APKMIRROR_BASE + href + return href + return None + + +def download_apk(url, dest_path, retries=3): + """Download APK from APKMirror with retry.""" + for attempt in range(retries): + try: + with requests.get(url, headers=HEADERS, stream=True, timeout=120) as r: + r.raise_for_status() + with open(dest_path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + size = dest_path.stat().st_size + if size < 100_000: # suspiciously small = likely error page + print(f" [!] Downloaded file too small ({size} bytes), may be error page") + dest_path.unlink(missing_ok=True) + return False + print(f" [+] Downloaded {dest_path.name} ({size // 1024 // 1024} MB)") + return True + except Exception as e: + print(f" [!] Download attempt {attempt+1} failed: {e}") + time.sleep(5 * (attempt + 1)) + return False + + +def get_or_create_release(): + """Get existing 'apks' release or create it. Returns release id.""" + api = f"https://api.github.com/repos/{GITHUB_REPO}/releases/tags/{RELEASE_TAG}" + headers = {"Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json"} + r = requests.get(api, headers=headers) + if r.status_code == 200: + return r.json()["id"], r.json()["upload_url"] + # Create it + api = f"https://api.github.com/repos/{GITHUB_REPO}/releases" + payload = { + "tag_name": RELEASE_TAG, + "name": "APK Mirror", + "body": "Auto-scraped APKs for Morphe patching", + "prerelease": False + } + r = requests.post(api, headers=headers, json=payload) + r.raise_for_status() + return r.json()["id"], r.json()["upload_url"] + + +def list_release_assets(release_id): + """Returns dict of {filename: asset_id} already in the release.""" + api = f"https://api.github.com/repos/{GITHUB_REPO}/releases/{release_id}/assets" + headers = {"Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json"} + r = requests.get(api, headers=headers) + r.raise_for_status() + return {a["name"]: a["id"] for a in r.json()} + + +def delete_asset(asset_id): + api = f"https://api.github.com/repos/{GITHUB_REPO}/releases/assets/{asset_id}" + headers = {"Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json"} + requests.delete(api, headers=headers) + + +def upload_asset(upload_url, file_path): + """Upload APK file to GitHub release.""" + upload_url = upload_url.split("{")[0] # strip template part + headers = { + "Authorization": f"Bearer {GITHUB_TOKEN}", + "Content-Type": "application/vnd.android.package-archive", + } + params = {"name": file_path.name} + with open(file_path, "rb") as f: + r = requests.post(upload_url, headers=headers, params=params, data=f, timeout=300) + if r.status_code in (200, 201): + print(f" [+] Uploaded {file_path.name} to GitHub release") + return r.json().get("browser_download_url", "") + else: + print(f" [!] Upload failed ({r.status_code}): {r.text[:200]}") + return "" + + +def scrape_and_upload(app): + name = app["name"] + package = app["package"] + version = app.get("version") + base_url = app["apkmirror_url"] + arch = app.get("arch", "nodpi") + + print(f"\n[>] {name} ({package}) version={version or 'latest'}") + + # Determine release page URL + if version: + release_url = base_url + else: + print(f" [~] No version pinned, finding latest...") + release_url = find_latest_version_url(base_url) + if not release_url: + print(f" [!] Could not find latest version URL for {name}") + return None + print(f" [~] Latest release page: {release_url}") + + # Find APK variant download page + dl_page = find_apk_download_page(release_url, arch) + if not dl_page: + print(f" [!] Could not find APK download page for {name}") + return None + print(f" [~] Download page: {dl_page}") + + # Get final download URL + final_url = get_final_download_url(dl_page) + if not final_url: + print(f" [!] Could not extract final download URL for {name}") + return None + print(f" [~] Final URL: {final_url}") + + # Download APK + safe_name = re.sub(r"[^a-zA-Z0-9._-]", "_", name) + ver_tag = version.replace(" ", "_") if version else "latest" + filename = f"{package}-{ver_tag}.apk" + dest = DOWNLOAD_DIR / filename + DOWNLOAD_DIR.mkdir(exist_ok=True) + + if not download_apk(final_url, dest): + print(f" [!] Failed to download APK for {name}") + return None + + return dest + + +def main(): + if not GITHUB_TOKEN: + print("[!] GITHUB_TOKEN not set, cannot upload to releases") + sys.exit(1) + + apps = load_apps() + print(f"[*] Loaded {len(apps)} apps from apps.json") + + release_id, upload_url = get_or_create_release() + print(f"[*] Using release id={release_id}") + existing_assets = list_release_assets(release_id) + print(f"[*] Existing assets: {list(existing_assets.keys())}") + + results = [] + for app in apps: + apk_path = scrape_and_upload(app) + if not apk_path: + results.append({"name": app["name"], "status": "FAILED", "url": ""}) + continue + + # Delete old asset with same name if exists + if apk_path.name in existing_assets: + print(f" [~] Replacing existing asset {apk_path.name}") + delete_asset(existing_assets[apk_path.name]) + + dl_url = upload_asset(upload_url, apk_path) + results.append({ + "name": app["name"], + "package": app["package"], + "version": app.get("version", "latest"), + "status": "OK" if dl_url else "UPLOAD_FAILED", + "url": dl_url + }) + + # Clean up local file after upload + apk_path.unlink(missing_ok=True) + time.sleep(2) # be polite to APKMirror + + # Write results summary + with open("scrape_results.json", "w") as f: + json.dump(results, f, indent=2) + + print("\n=== Scrape Summary ===") + ok = sum(1 for r in results if r["status"] == "OK") + failed = [r["name"] for r in results if r["status"] != "OK"] + print(f"Success: {ok}/{len(results)}") + if failed: + print(f"Failed: {', '.join(failed)}") + sys.exit(1) + print("All APKs scraped and uploaded successfully!") + + +if __name__ == "__main__": + main() From dd7d246a23a30e1f033d1ca21f2c0e8f5e7f3b26 Mon Sep 17 00:00:00 2001 From: myst25 <83259411+myst-25@users.noreply.github.com> Date: Wed, 22 Apr 2026 22:27:30 +0530 Subject: [PATCH 03/24] fix: rewrite scraper with correct APKMirror selectors and download flow --- scraper.py | 390 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 271 insertions(+), 119 deletions(-) diff --git a/scraper.py b/scraper.py index e8ed129..e381c43 100644 --- a/scraper.py +++ b/scraper.py @@ -10,17 +10,17 @@ import re import sys import time -import subprocess from pathlib import Path import requests from bs4 import BeautifulSoup HEADERS = { - "User-Agent": "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 " - "(KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36", + "User-Agent": "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36", "Accept-Language": "en-US,en;q=0.9", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Referer": "https://www.apkmirror.com/", } APKMIRROR_BASE = "https://www.apkmirror.com" @@ -36,193 +36,346 @@ def load_apps(): return json.load(f) -def get_soup(url, retries=3): +def get_soup(url, retries=3, delay=4): for attempt in range(retries): try: resp = requests.get(url, headers=HEADERS, timeout=30) + if resp.status_code == 429: + wait = int(resp.headers.get("Retry-After", 30)) + print(f" [!] Rate limited, waiting {wait}s...") + time.sleep(wait) + continue resp.raise_for_status() return BeautifulSoup(resp.text, "html.parser") except Exception as e: print(f" [!] Attempt {attempt+1} failed for {url}: {e}") - time.sleep(5 * (attempt + 1)) + time.sleep(delay * (attempt + 1)) + return None + + +def version_to_url_slug(version): + """Convert version string like '20.47.62' -> '20-47-62'""" + return re.sub(r"[^a-zA-Z0-9]+", "-", version).strip("-").lower() + + +def find_release_page(base_url, version): + """ + Given the APKMirror listing URL and a version string, + find the correct release page URL. + Strategy: + 1. Load listing page and search for a link containing the version slug. + 2. Fallback: construct URL from base_url + version slug pattern. + """ + soup = get_soup(base_url) + if not soup: + return None + + ver_slug = version_to_url_slug(version) + # Search all links on the page for one matching the version + for a in soup.find_all("a", href=True): + href = a["href"] + if ver_slug in href and "/apk/" in href and href.endswith("/"): + # Make sure it's a release page (not download page) + if "download" not in href: + full = href if href.startswith("http") else APKMIRROR_BASE + href + print(f" [~] Found release page via listing: {full}") + return full + + # Fallback: try to construct the URL directly + # APKMirror pattern: {base_url}{app-slug}-{version-slug}-release/ + # Extract app slug from base_url + base_clean = base_url.rstrip("/") + app_slug = base_clean.split("/")[-1] + constructed = f"{base_clean}/{app_slug}-{ver_slug}-release/" + print(f" [~] Trying constructed URL: {constructed}") + resp = requests.get(constructed, headers=HEADERS, timeout=20) + if resp.status_code == 200 and "apkmirror" in resp.url: + return constructed + + print(f" [!] Could not find release page for version {version}") return None -def find_latest_version_url(base_url): - """When no version is pinned, find the latest APK page URL from listing.""" +def find_latest_release_page(base_url): + """When no version is pinned, get the first/latest release page URL.""" soup = get_soup(base_url) if not soup: return None - link = soup.select_one(".appRowVariantTag~ .appRowVariantTag+ .table-cell a") - if not link: - # fallback: grab first release link - link = soup.select_one('a[href*="-release/"]') - if link: - return APKMIRROR_BASE + link["href"] + # APKMirror listing: release rows have class 'appRow' + for a in soup.select(".appRow a[href]"): + href = a["href"] + if "-release/" in href and "download" not in href: + full = href if href.startswith("http") else APKMIRROR_BASE + href + return full + # Fallback: any link with -release/ + a = soup.find("a", href=re.compile(r"-release/$")) + if a: + href = a["href"] + return href if href.startswith("http") else APKMIRROR_BASE + href return None -def find_apk_download_page(release_url, arch): - """From a release page, find the direct APK variant download page link.""" +def find_apk_variant_page(release_url, arch): + """ + From a release page (e.g. /apk/google-inc/youtube/youtube-20-47-62-release/), + find the individual APK variant download info page. + APKMirror shows a table of variants; we pick APK (not APKM/bundle), matching arch. + """ soup = get_soup(release_url) if not soup: return None - # Look for APK (not APKM/XAPK) download links - for row in soup.select(".table-cell.rowheight"): - text = row.get_text() - # Prefer matching arch or nodpi / universal - if arch and arch != "nodpi": - if arch not in text and "universal" not in text.lower(): - continue - # Skip bundles - if "BUNDLE" in text.upper() or "APKS" in text.upper(): + # The variants table rows — each has a link to the variant info page + # Real APKMirror selector: div.table-cell > span contains arch info, + # and the row has a link to /apk/.../download-variant/ + rows = soup.select("div.variants-table .table-row") + if not rows: + # fallback selector used on some pages + rows = soup.select(".apkm-badge") + + # Strategy: collect all APK (non-bundle) variant links + candidates = [] + for a in soup.find_all("a", href=re.compile(r"/apk/.+/\d+/$")): + href = a["href"] + # Get surrounding text to check arch and type + parent_text = a.find_parent().get_text(" ", strip=True) if a.find_parent() else "" + # Skip APKM bundles + if "BUNDLE" in parent_text.upper() or "APKM" in parent_text.upper(): continue - link = row.find("a", href=re.compile(r"/apk/.+download/")) - if link: - return APKMIRROR_BASE + link["href"] - - # Fallback: find any APK download variant link on page - link = soup.find("a", href=re.compile(r"/apk/.+download/"), string=re.compile(r"APK", re.I)) - if link: - return APKMIRROR_BASE + link["href"] + candidates.append((href, parent_text)) + + if not candidates: + # Try broader: any link ending with digit/ + for a in soup.find_all("a", href=re.compile(r"/apk/")): + href = a["href"] + if re.search(r"/\d+/$", href): + parent_text = a.find_parent().get_text(" ", strip=True) if a.find_parent() else "" + if "BUNDLE" not in parent_text.upper(): + candidates.append((href, parent_text)) + + if not candidates: + print(f" [!] No variant links found on {release_url}") + return None + + # Prefer arch match, then nodpi/universal, then first + def score(item): + href, text = item + t = text.lower() + if arch and arch != "nodpi" and arch.lower() in t: + return 0 + if "nodpi" in t or "universal" in t or "all" in t: + return 1 + return 2 + + candidates.sort(key=score) + best_href = candidates[0][0] + full = best_href if best_href.startswith("http") else APKMIRROR_BASE + best_href + print(f" [~] Variant page: {full}") + return full + + +def get_download_page_url(variant_page_url): + """ + From a variant info page (/apk/.../{id}/), + find the 'Download APK' button link which leads to the interstitial download page. + """ + soup = get_soup(variant_page_url) + if not soup: + return None + + # APKMirror: the green download button links to a page like + # /apk/.../download/?key=... + btn = soup.find("a", href=re.compile(r"download/\?key=")) + if btn: + href = btn["href"] + full = href if href.startswith("http") else APKMIRROR_BASE + href + print(f" [~] Interstitial page: {full}") + return full + + # Fallback: look for any download link + btn = soup.find("a", string=re.compile(r"download", re.I), href=re.compile(r"download")) + if btn: + href = btn["href"] + full = href if href.startswith("http") else APKMIRROR_BASE + href + return full + + print(f" [!] No download button found on {variant_page_url}") return None -def get_final_download_url(download_page_url): - """From APKMirror download page, extract the final direct download URL.""" - soup = get_soup(download_page_url) +def get_final_apk_url(interstitial_url): + """ + APKMirror interstitial page has a 'Click here to download' link + that is the actual APK file URL (via their CDN/redirect). + The real link is in: a[rel='nofollow'] or href containing 'cdn.apkmirror.com' + """ + soup = get_soup(interstitial_url) if not soup: return None - # The actual download button - btn = soup.select_one("a[href*='?key=']") or soup.select_one(".downloadButton a") or \ - soup.find("a", href=re.compile(r"download\.php\?key=")) - if btn: - href = btn.get("href", "") - if href.startswith("/"): - return APKMIRROR_BASE + href - return href + + # Direct CDN link + a = soup.find("a", href=re.compile(r"cdn\.apkmirror\.com")) + if a: + return a["href"] + + # Fallback: nofollow download link + a = soup.find("a", rel="nofollow", href=re.compile(r"\.apk")) + if a: + href = a["href"] + return href if href.startswith("http") else APKMIRROR_BASE + href + + # Last resort: any .apk link + a = soup.find("a", href=re.compile(r"\.apk")) + if a: + href = a["href"] + return href if href.startswith("http") else APKMIRROR_BASE + href + + # Try extracting from onclick / data attrs + for tag in soup.find_all(attrs={"data-google-interstitial": True}): + href = tag.get("href", "") + if href: + return href if href.startswith("http") else APKMIRROR_BASE + href + + print(f" [!] Could not find final APK URL on {interstitial_url}") return None def download_apk(url, dest_path, retries=3): - """Download APK from APKMirror with retry.""" + """Download APK with retry and size validation.""" for attempt in range(retries): try: - with requests.get(url, headers=HEADERS, stream=True, timeout=120) as r: + with requests.get(url, headers=HEADERS, stream=True, + timeout=180, allow_redirects=True) as r: r.raise_for_status() + content_type = r.headers.get("Content-Type", "") + if "text/html" in content_type: + print(f" [!] Got HTML instead of APK (blocked/captcha?)") + return False with open(dest_path, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): + for chunk in r.iter_content(chunk_size=65536): f.write(chunk) size = dest_path.stat().st_size - if size < 100_000: # suspiciously small = likely error page - print(f" [!] Downloaded file too small ({size} bytes), may be error page") + if size < 500_000: + print(f" [!] File too small ({size} bytes), likely not a valid APK") dest_path.unlink(missing_ok=True) return False print(f" [+] Downloaded {dest_path.name} ({size // 1024 // 1024} MB)") return True except Exception as e: print(f" [!] Download attempt {attempt+1} failed: {e}") - time.sleep(5 * (attempt + 1)) + time.sleep(8 * (attempt + 1)) return False def get_or_create_release(): - """Get existing 'apks' release or create it. Returns release id.""" - api = f"https://api.github.com/repos/{GITHUB_REPO}/releases/tags/{RELEASE_TAG}" - headers = {"Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json"} - r = requests.get(api, headers=headers) + api_base = f"https://api.github.com/repos/{GITHUB_REPO}" + gh_headers = { + "Authorization": f"Bearer {GITHUB_TOKEN}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28" + } + r = requests.get(f"{api_base}/releases/tags/{RELEASE_TAG}", headers=gh_headers) if r.status_code == 200: - return r.json()["id"], r.json()["upload_url"] - # Create it - api = f"https://api.github.com/repos/{GITHUB_REPO}/releases" + data = r.json() + return data["id"], data["upload_url"] + # Create release + tag payload = { "tag_name": RELEASE_TAG, "name": "APK Mirror", - "body": "Auto-scraped APKs for Morphe patching", + "body": "Auto-scraped APKs for Morphe patching. Do not edit manually.", "prerelease": False } - r = requests.post(api, headers=headers, json=payload) + r = requests.post(f"{api_base}/releases", headers=gh_headers, json=payload) r.raise_for_status() - return r.json()["id"], r.json()["upload_url"] + data = r.json() + return data["id"], data["upload_url"] def list_release_assets(release_id): - """Returns dict of {filename: asset_id} already in the release.""" api = f"https://api.github.com/repos/{GITHUB_REPO}/releases/{release_id}/assets" - headers = {"Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json"} - r = requests.get(api, headers=headers) + gh_headers = { + "Authorization": f"Bearer {GITHUB_TOKEN}", + "Accept": "application/vnd.github+json" + } + r = requests.get(api, headers=gh_headers) r.raise_for_status() return {a["name"]: a["id"] for a in r.json()} def delete_asset(asset_id): api = f"https://api.github.com/repos/{GITHUB_REPO}/releases/assets/{asset_id}" - headers = {"Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json"} - requests.delete(api, headers=headers) + gh_headers = { + "Authorization": f"Bearer {GITHUB_TOKEN}", + "Accept": "application/vnd.github+json" + } + requests.delete(api, headers=gh_headers) def upload_asset(upload_url, file_path): - """Upload APK file to GitHub release.""" - upload_url = upload_url.split("{")[0] # strip template part - headers = { + upload_url = re.sub(r"\{.*\}", "", upload_url) # strip {?name,label} template + gh_headers = { "Authorization": f"Bearer {GITHUB_TOKEN}", "Content-Type": "application/vnd.android.package-archive", + "Accept": "application/vnd.github+json" } - params = {"name": file_path.name} + params = {"name": file_path.name, "label": file_path.name} with open(file_path, "rb") as f: - r = requests.post(upload_url, headers=headers, params=params, data=f, timeout=300) + r = requests.post(upload_url, headers=gh_headers, + params=params, data=f, timeout=600) if r.status_code in (200, 201): - print(f" [+] Uploaded {file_path.name} to GitHub release") - return r.json().get("browser_download_url", "") - else: - print(f" [!] Upload failed ({r.status_code}): {r.text[:200]}") - return "" + url = r.json().get("browser_download_url", "") + print(f" [+] Uploaded to release: {url}") + return url + print(f" [!] Upload failed ({r.status_code}): {r.text[:300]}") + return "" -def scrape_and_upload(app): +def scrape_app(app): name = app["name"] package = app["package"] version = app.get("version") - base_url = app["apkmirror_url"] + base_url = app["apkmirror_url"].rstrip("/") + "/" arch = app.get("arch", "nodpi") - print(f"\n[>] {name} ({package}) version={version or 'latest'}") + print(f"\n[>] {name} | pkg={package} | ver={version or 'latest'} | arch={arch}") - # Determine release page URL + # Step 1: Find release page if version: - release_url = base_url + release_page = find_release_page(base_url, version) else: - print(f" [~] No version pinned, finding latest...") - release_url = find_latest_version_url(base_url) - if not release_url: - print(f" [!] Could not find latest version URL for {name}") - return None - print(f" [~] Latest release page: {release_url}") - - # Find APK variant download page - dl_page = find_apk_download_page(release_url, arch) - if not dl_page: - print(f" [!] Could not find APK download page for {name}") + release_page = find_latest_release_page(base_url) + + if not release_page: + print(f" [FAIL] Could not find release page") + return None + + # Step 2: Find APK variant page + variant_page = find_apk_variant_page(release_page, arch) + if not variant_page: + print(f" [FAIL] Could not find variant page") return None - print(f" [~] Download page: {dl_page}") - # Get final download URL - final_url = get_final_download_url(dl_page) + # Step 3: Get interstitial download page + interstitial = get_download_page_url(variant_page) + if not interstitial: + print(f" [FAIL] Could not find download page") + return None + + # Step 4: Get final APK CDN URL + final_url = get_final_apk_url(interstitial) if not final_url: - print(f" [!] Could not extract final download URL for {name}") + print(f" [FAIL] Could not get final APK URL") return None - print(f" [~] Final URL: {final_url}") + print(f" [~] Final APK URL: {final_url}") - # Download APK - safe_name = re.sub(r"[^a-zA-Z0-9._-]", "_", name) - ver_tag = version.replace(" ", "_") if version else "latest" + # Step 5: Download + DOWNLOAD_DIR.mkdir(exist_ok=True) + ver_tag = (version or "latest").replace(" ", "_") filename = f"{package}-{ver_tag}.apk" dest = DOWNLOAD_DIR / filename - DOWNLOAD_DIR.mkdir(exist_ok=True) if not download_apk(final_url, dest): - print(f" [!] Failed to download APK for {name}") + print(f" [FAIL] Download failed") return None return dest @@ -230,28 +383,29 @@ def scrape_and_upload(app): def main(): if not GITHUB_TOKEN: - print("[!] GITHUB_TOKEN not set, cannot upload to releases") + print("[!] GITHUB_TOKEN not set") sys.exit(1) apps = load_apps() - print(f"[*] Loaded {len(apps)} apps from apps.json") + print(f"[*] Loaded {len(apps)} apps") release_id, upload_url = get_or_create_release() - print(f"[*] Using release id={release_id}") - existing_assets = list_release_assets(release_id) - print(f"[*] Existing assets: {list(existing_assets.keys())}") + print(f"[*] Release id={release_id}") + existing = list_release_assets(release_id) + print(f"[*] Existing assets: {len(existing)}") results = [] for app in apps: - apk_path = scrape_and_upload(app) + apk_path = scrape_app(app) if not apk_path: results.append({"name": app["name"], "status": "FAILED", "url": ""}) + time.sleep(3) continue - # Delete old asset with same name if exists - if apk_path.name in existing_assets: - print(f" [~] Replacing existing asset {apk_path.name}") - delete_asset(existing_assets[apk_path.name]) + # Replace old asset if exists + if apk_path.name in existing: + print(f" [~] Deleting old asset: {apk_path.name}") + delete_asset(existing[apk_path.name]) dl_url = upload_asset(upload_url, apk_path) results.append({ @@ -262,22 +416,20 @@ def main(): "url": dl_url }) - # Clean up local file after upload apk_path.unlink(missing_ok=True) - time.sleep(2) # be polite to APKMirror + time.sleep(3) # polite delay between apps - # Write results summary with open("scrape_results.json", "w") as f: json.dump(results, f, indent=2) - print("\n=== Scrape Summary ===") - ok = sum(1 for r in results if r["status"] == "OK") + print("\n=== Summary ===") + ok = [r for r in results if r["status"] == "OK"] failed = [r["name"] for r in results if r["status"] != "OK"] - print(f"Success: {ok}/{len(results)}") + print(f"OK: {len(ok)}/{len(results)}") if failed: - print(f"Failed: {', '.join(failed)}") + print(f"FAILED: {', '.join(failed)}") sys.exit(1) - print("All APKs scraped and uploaded successfully!") + print("Done!") if __name__ == "__main__": From e23b038b6ccad3d60a6d21bedb0994344818d2b0 Mon Sep 17 00:00:00 2001 From: myst25 <83259411+myst-25@users.noreply.github.com> Date: Wed, 22 Apr 2026 22:38:49 +0530 Subject: [PATCH 04/24] feat: add Uptodown, APKPure, APKCombo as fallback sources alongside APKMirror --- scraper.py | 632 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 357 insertions(+), 275 deletions(-) diff --git a/scraper.py b/scraper.py index e381c43..7ffd3e4 100644 --- a/scraper.py +++ b/scraper.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 """ Morphe APK Scraper -Scrapes APKMirror for exact APK versions needed by Morphe patches -and uploads them to GitHub Releases. +Tries multiple sources in order: APKMirror -> Uptodown -> APKPure -> APKCombo +Uploads successfully downloaded APKs to GitHub Releases. """ import json @@ -16,14 +16,18 @@ from bs4 import BeautifulSoup HEADERS = { - "User-Agent": "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 " + "User-Agent": "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36", "Accept-Language": "en-US,en;q=0.9", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", - "Referer": "https://www.apkmirror.com/", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Referer": "https://www.google.com/", } APKMIRROR_BASE = "https://www.apkmirror.com" +UPTODOWN_BASE = "https://uptodown.com" +APKPURE_BASE = "https://apkpure.net" +APKCOMBO_BASE = "https://apkcombo.com" + DOWNLOAD_DIR = Path("apks") APPS_FILE = Path("apps.json") GITHUB_REPO = os.environ.get("GITHUB_REPOSITORY", "myst-25/morphe-apk-scraper") @@ -31,354 +35,425 @@ RELEASE_TAG = "apks" +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + def load_apps(): with open(APPS_FILE) as f: return json.load(f) -def get_soup(url, retries=3, delay=4): +def get_soup(url, retries=3, delay=5, extra_headers=None): + h = {**HEADERS, **(extra_headers or {})} for attempt in range(retries): try: - resp = requests.get(url, headers=HEADERS, timeout=30) + resp = requests.get(url, headers=h, timeout=30, allow_redirects=True) if resp.status_code == 429: wait = int(resp.headers.get("Retry-After", 30)) - print(f" [!] Rate limited, waiting {wait}s...") + print(f" [rate-limit] waiting {wait}s") time.sleep(wait) continue + if resp.status_code == 404: + return None resp.raise_for_status() return BeautifulSoup(resp.text, "html.parser") except Exception as e: - print(f" [!] Attempt {attempt+1} failed for {url}: {e}") + print(f" [!] attempt {attempt+1} failed: {e}") time.sleep(delay * (attempt + 1)) return None -def version_to_url_slug(version): - """Convert version string like '20.47.62' -> '20-47-62'""" +def ver_slug(version): + """'20.47.62' -> '20-47-62'""" return re.sub(r"[^a-zA-Z0-9]+", "-", version).strip("-").lower() -def find_release_page(base_url, version): - """ - Given the APKMirror listing URL and a version string, - find the correct release page URL. - Strategy: - 1. Load listing page and search for a link containing the version slug. - 2. Fallback: construct URL from base_url + version slug pattern. - """ - soup = get_soup(base_url) - if not soup: - return None +def download_file(url, dest_path, retries=3, extra_headers=None): + h = {**HEADERS, **(extra_headers or {})} + for attempt in range(retries): + try: + with requests.get(url, headers=h, stream=True, + timeout=180, allow_redirects=True) as r: + r.raise_for_status() + ct = r.headers.get("Content-Type", "") + if "text/html" in ct: + print(f" [!] Got HTML page instead of APK — likely blocked") + return False + with open(dest_path, "wb") as f: + for chunk in r.iter_content(65536): + f.write(chunk) + size = dest_path.stat().st_size + if size < 500_000: + print(f" [!] File too small ({size} bytes), not a valid APK") + dest_path.unlink(missing_ok=True) + return False + print(f" [+] Downloaded {dest_path.name} ({size // 1024 // 1024} MB)") + return True + except Exception as e: + print(f" [!] Download attempt {attempt+1} failed: {e}") + time.sleep(8 * (attempt + 1)) + return False - ver_slug = version_to_url_slug(version) - # Search all links on the page for one matching the version - for a in soup.find_all("a", href=True): - href = a["href"] - if ver_slug in href and "/apk/" in href and href.endswith("/"): - # Make sure it's a release page (not download page) - if "download" not in href: - full = href if href.startswith("http") else APKMIRROR_BASE + href - print(f" [~] Found release page via listing: {full}") - return full - - # Fallback: try to construct the URL directly - # APKMirror pattern: {base_url}{app-slug}-{version-slug}-release/ - # Extract app slug from base_url - base_clean = base_url.rstrip("/") - app_slug = base_clean.split("/")[-1] - constructed = f"{base_clean}/{app_slug}-{ver_slug}-release/" - print(f" [~] Trying constructed URL: {constructed}") - resp = requests.get(constructed, headers=HEADERS, timeout=20) - if resp.status_code == 200 and "apkmirror" in resp.url: - return constructed - - print(f" [!] Could not find release page for version {version}") - return None +# --------------------------------------------------------------------------- +# Source 1: APKMirror +# --------------------------------------------------------------------------- + +def apkmirror_get(app): + base = app["apkmirror_url"].rstrip("/") + "/" + version = app.get("version") + arch = app.get("arch", "nodpi") + package = app["package"] -def find_latest_release_page(base_url): - """When no version is pinned, get the first/latest release page URL.""" - soup = get_soup(base_url) + print(" [APKMirror] trying...") + + # Step 1: Find release page + soup = get_soup(base) if not soup: return None - # APKMirror listing: release rows have class 'appRow' - for a in soup.select(".appRow a[href]"): - href = a["href"] - if "-release/" in href and "download" not in href: - full = href if href.startswith("http") else APKMIRROR_BASE + href - return full - # Fallback: any link with -release/ - a = soup.find("a", href=re.compile(r"-release/$")) - if a: - href = a["href"] - return href if href.startswith("http") else APKMIRROR_BASE + href - return None + release_page = None + if version: + slug = ver_slug(version) + for a in soup.find_all("a", href=True): + if slug in a["href"] and "/apk/" in a["href"] and "download" not in a["href"]: + release_page = APKMIRROR_BASE + a["href"] if a["href"].startswith("/") else a["href"] + break + if not release_page: + # construct directly + app_slug = base.rstrip("/").split("/")[-1] + release_page = f"{base}{app_slug}-{slug}-release/" + else: + for a in soup.find_all("a", href=re.compile(r"-release/$")): + release_page = APKMIRROR_BASE + a["href"] if a["href"].startswith("/") else a["href"] + break -def find_apk_variant_page(release_url, arch): - """ - From a release page (e.g. /apk/google-inc/youtube/youtube-20-47-62-release/), - find the individual APK variant download info page. - APKMirror shows a table of variants; we pick APK (not APKM/bundle), matching arch. - """ - soup = get_soup(release_url) - if not soup: + if not release_page: + print(" [!] Could not find release page") return None + print(f" release_page={release_page}") - # The variants table rows — each has a link to the variant info page - # Real APKMirror selector: div.table-cell > span contains arch info, - # and the row has a link to /apk/.../download-variant/ - rows = soup.select("div.variants-table .table-row") - if not rows: - # fallback selector used on some pages - rows = soup.select(".apkm-badge") + # Step 2: Find variant page (individual APK) + soup2 = get_soup(release_page) + if not soup2: + return None - # Strategy: collect all APK (non-bundle) variant links + variant_page = None candidates = [] - for a in soup.find_all("a", href=re.compile(r"/apk/.+/\d+/$")): - href = a["href"] - # Get surrounding text to check arch and type - parent_text = a.find_parent().get_text(" ", strip=True) if a.find_parent() else "" - # Skip APKM bundles - if "BUNDLE" in parent_text.upper() or "APKM" in parent_text.upper(): + for a in soup2.find_all("a", href=re.compile(r"/apk/.+/\d+/$")): + parent_text = (a.find_parent() or a).get_text(" ", strip=True).upper() + if "BUNDLE" in parent_text or "APKM" in parent_text: continue - candidates.append((href, parent_text)) - - if not candidates: - # Try broader: any link ending with digit/ - for a in soup.find_all("a", href=re.compile(r"/apk/")): - href = a["href"] - if re.search(r"/\d+/$", href): - parent_text = a.find_parent().get_text(" ", strip=True) if a.find_parent() else "" - if "BUNDLE" not in parent_text.upper(): - candidates.append((href, parent_text)) - - if not candidates: - print(f" [!] No variant links found on {release_url}") - return None + candidates.append((a["href"], parent_text)) - # Prefer arch match, then nodpi/universal, then first def score(item): href, text = item t = text.lower() if arch and arch != "nodpi" and arch.lower() in t: return 0 - if "nodpi" in t or "universal" in t or "all" in t: + if "nodpi" in t or "universal" in t: return 1 return 2 - candidates.sort(key=score) - best_href = candidates[0][0] - full = best_href if best_href.startswith("http") else APKMIRROR_BASE + best_href - print(f" [~] Variant page: {full}") - return full + if candidates: + candidates.sort(key=score) + href = candidates[0][0] + variant_page = APKMIRROR_BASE + href if href.startswith("/") else href + if not variant_page: + print(" [!] No variant page found") + return None + print(f" variant_page={variant_page}") + # Step 3: Interstitial download page + soup3 = get_soup(variant_page) + if not soup3: + return None + btn = soup3.find("a", href=re.compile(r"download/\?key=")) + if not btn: + print(" [!] No download button found") + return None + interstitial = APKMIRROR_BASE + btn["href"] if btn["href"].startswith("/") else btn["href"] + print(f" interstitial={interstitial}") -def get_download_page_url(variant_page_url): - """ - From a variant info page (/apk/.../{id}/), - find the 'Download APK' button link which leads to the interstitial download page. - """ - soup = get_soup(variant_page_url) + # Step 4: Final CDN URL + soup4 = get_soup(interstitial) + if not soup4: + return None + final = None + for a in soup4.find_all("a", href=True): + if "cdn.apkmirror.com" in a["href"] or re.search(r"\.apk(\?|$)", a["href"]): + final = a["href"] + break + if not final: + print(" [!] Final URL not found") + return None + print(f" final_url={final}") + + dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ', '_')}.apk" + DOWNLOAD_DIR.mkdir(exist_ok=True) + return dest if download_file(final, dest) else None + + +# --------------------------------------------------------------------------- +# Source 2: Uptodown +# --------------------------------------------------------------------------- + +def uptodown_get(app): + uptodown_url = app.get("uptodown_dlurl") + version = app.get("version") + package = app["package"] + + if not uptodown_url: + return None + + print(" [Uptodown] trying...") + base = uptodown_url.rstrip("/") + + # Uptodown version page pattern: {base}/versions + versions_url = f"{base}/versions" + soup = get_soup(versions_url) if not soup: + # try direct download page + soup = get_soup(base) + if not soup: + return None + + # Find the version download link + dl_url = None + if version: + # Look for link containing exact version text + for a in soup.find_all("a", href=True): + if version in a.get_text() or version in a["href"]: + dl_url = a["href"] + break + if not dl_url: + # Latest: find first .apk or /download/ link + for a in soup.find_all("a", href=re.compile(r"/(download|post-download)/")): + dl_url = a["href"] + break + + if not dl_url: + print(" [!] No download link found on Uptodown") return None - # APKMirror: the green download button links to a page like - # /apk/.../download/?key=... - btn = soup.find("a", href=re.compile(r"download/\?key=")) - if btn: - href = btn["href"] - full = href if href.startswith("http") else APKMIRROR_BASE + href - print(f" [~] Interstitial page: {full}") - return full - - # Fallback: look for any download link - btn = soup.find("a", string=re.compile(r"download", re.I), href=re.compile(r"download")) - if btn: - href = btn["href"] - full = href if href.startswith("http") else APKMIRROR_BASE + href - return full - - print(f" [!] No download button found on {variant_page_url}") - return None + if not dl_url.startswith("http"): + from urllib.parse import urljoin + dl_url = urljoin(base, dl_url) + print(f" dl_url={dl_url}") + + # Navigate to download page to get direct link + soup2 = get_soup(dl_url) + final = None + if soup2: + btn = soup2.find("a", id="detail-download-button") or \ + soup2.find("a", href=re.compile(r"\.apk")) + if btn: + final = btn["href"] + if not final.startswith("http"): + from urllib.parse import urljoin + final = urljoin(dl_url, final) + if not final: + final = dl_url # try directly + + print(f" final_url={final}") + dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ', '_')}.apk" + DOWNLOAD_DIR.mkdir(exist_ok=True) + return dest if download_file(final, dest) else None -def get_final_apk_url(interstitial_url): - """ - APKMirror interstitial page has a 'Click here to download' link - that is the actual APK file URL (via their CDN/redirect). - The real link is in: a[rel='nofollow'] or href containing 'cdn.apkmirror.com' - """ - soup = get_soup(interstitial_url) +# --------------------------------------------------------------------------- +# Source 3: APKPure +# --------------------------------------------------------------------------- + +def apkpure_get(app): + package = app["package"] + version = app.get("version") + print(" [APKPure] trying...") + + # APKPure search URL + search_url = f"https://apkpure.net/search?q={package}" + soup = get_soup(search_url) if not soup: return None - # Direct CDN link - a = soup.find("a", href=re.compile(r"cdn\.apkmirror\.com")) - if a: - return a["href"] + # Find app page link + app_link = None + for a in soup.find_all("a", href=re.compile(r"/" + re.escape(package.split(".")[-1].lower()))): + app_link = a["href"] + break + if not app_link: + # Direct URL guess + app_name_slug = package.replace(".", "-").lower() + app_link = f"https://apkpure.net/{app_name_slug}/{package}" + elif not app_link.startswith("http"): + app_link = "https://apkpure.net" + app_link + + print(f" app_page={app_link}") + + # Get download page + dl_page = f"{app_link}/download" + if version: + dl_page = f"{app_link}/{version}/download" - # Fallback: nofollow download link - a = soup.find("a", rel="nofollow", href=re.compile(r"\.apk")) - if a: - href = a["href"] - return href if href.startswith("http") else APKMIRROR_BASE + href + soup2 = get_soup(dl_page) + if not soup2: + soup2 = get_soup(app_link) + if not soup2: + return None - # Last resort: any .apk link - a = soup.find("a", href=re.compile(r"\.apk")) - if a: + # Find APK download link + final = None + for a in soup2.find_all("a", href=True): href = a["href"] - return href if href.startswith("http") else APKMIRROR_BASE + href + if ".apk" in href and ("download" in href or "dw.apkpure" in href): + final = href + break + if not final: + btn = soup2.find("a", id="download_link") or soup2.find("a", class_=re.compile(r"download")) + if btn: + final = btn.get("href", "") + + if not final: + print(" [!] No download link found on APKPure") + return None - # Try extracting from onclick / data attrs - for tag in soup.find_all(attrs={"data-google-interstitial": True}): - href = tag.get("href", "") - if href: - return href if href.startswith("http") else APKMIRROR_BASE + href + if not final.startswith("http"): + final = "https://apkpure.net" + final + print(f" final_url={final}") - print(f" [!] Could not find final APK URL on {interstitial_url}") - return None + dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ', '_')}.apk" + DOWNLOAD_DIR.mkdir(exist_ok=True) + return dest if download_file(final, dest) else None -def download_apk(url, dest_path, retries=3): - """Download APK with retry and size validation.""" - for attempt in range(retries): - try: - with requests.get(url, headers=HEADERS, stream=True, - timeout=180, allow_redirects=True) as r: - r.raise_for_status() - content_type = r.headers.get("Content-Type", "") - if "text/html" in content_type: - print(f" [!] Got HTML instead of APK (blocked/captcha?)") - return False - with open(dest_path, "wb") as f: - for chunk in r.iter_content(chunk_size=65536): - f.write(chunk) - size = dest_path.stat().st_size - if size < 500_000: - print(f" [!] File too small ({size} bytes), likely not a valid APK") - dest_path.unlink(missing_ok=True) - return False - print(f" [+] Downloaded {dest_path.name} ({size // 1024 // 1024} MB)") - return True - except Exception as e: - print(f" [!] Download attempt {attempt+1} failed: {e}") - time.sleep(8 * (attempt + 1)) - return False +# --------------------------------------------------------------------------- +# Source 4: APKCombo +# --------------------------------------------------------------------------- + +def apkcombo_get(app): + package = app["package"] + version = app.get("version") + print(" [APKCombo] trying...") + + app_url = f"https://apkcombo.com/apk/{package}" + if version: + app_url = f"https://apkcombo.com/apk/{package}/{version}" + + soup = get_soup(app_url) + if not soup: + return None + + # Find direct APK download link + final = None + for a in soup.find_all("a", href=True): + href = a["href"] + if ".apk" in href and ("download" in href or "apkcombo" in href or "filedownload" in href): + final = href + break + if not final: + a = soup.find("a", class_=re.compile(r"download", re.I)) + if a: + final = a.get("href", "") + + if not final: + print(" [!] No download link found on APKCombo") + return None + + if not final.startswith("http"): + final = "https://apkcombo.com" + final + print(f" final_url={final}") + + dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ', '_')}.apk" + DOWNLOAD_DIR.mkdir(exist_ok=True) + return dest if download_file(final, dest) else None +# --------------------------------------------------------------------------- +# GitHub Release helpers +# --------------------------------------------------------------------------- + def get_or_create_release(): - api_base = f"https://api.github.com/repos/{GITHUB_REPO}" - gh_headers = { + gh = { "Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28" } - r = requests.get(f"{api_base}/releases/tags/{RELEASE_TAG}", headers=gh_headers) + base = f"https://api.github.com/repos/{GITHUB_REPO}" + r = requests.get(f"{base}/releases/tags/{RELEASE_TAG}", headers=gh) if r.status_code == 200: - data = r.json() - return data["id"], data["upload_url"] - # Create release + tag - payload = { + d = r.json() + return d["id"], d["upload_url"] + r = requests.post(f"{base}/releases", headers=gh, json={ "tag_name": RELEASE_TAG, "name": "APK Mirror", "body": "Auto-scraped APKs for Morphe patching. Do not edit manually.", "prerelease": False - } - r = requests.post(f"{api_base}/releases", headers=gh_headers, json=payload) + }) r.raise_for_status() - data = r.json() - return data["id"], data["upload_url"] + d = r.json() + return d["id"], d["upload_url"] -def list_release_assets(release_id): - api = f"https://api.github.com/repos/{GITHUB_REPO}/releases/{release_id}/assets" - gh_headers = { - "Authorization": f"Bearer {GITHUB_TOKEN}", - "Accept": "application/vnd.github+json" - } - r = requests.get(api, headers=gh_headers) +def list_assets(release_id): + gh = {"Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json"} + r = requests.get( + f"https://api.github.com/repos/{GITHUB_REPO}/releases/{release_id}/assets", + headers=gh + ) r.raise_for_status() return {a["name"]: a["id"] for a in r.json()} def delete_asset(asset_id): - api = f"https://api.github.com/repos/{GITHUB_REPO}/releases/assets/{asset_id}" - gh_headers = { - "Authorization": f"Bearer {GITHUB_TOKEN}", - "Accept": "application/vnd.github+json" - } - requests.delete(api, headers=gh_headers) + gh = {"Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json"} + requests.delete( + f"https://api.github.com/repos/{GITHUB_REPO}/releases/assets/{asset_id}", + headers=gh + ) def upload_asset(upload_url, file_path): - upload_url = re.sub(r"\{.*\}", "", upload_url) # strip {?name,label} template - gh_headers = { + url = re.sub(r"\{.*?\}", "", upload_url) + gh = { "Authorization": f"Bearer {GITHUB_TOKEN}", "Content-Type": "application/vnd.android.package-archive", "Accept": "application/vnd.github+json" } - params = {"name": file_path.name, "label": file_path.name} with open(file_path, "rb") as f: - r = requests.post(upload_url, headers=gh_headers, - params=params, data=f, timeout=600) + r = requests.post(url, headers=gh, + params={"name": file_path.name}, + data=f, timeout=600) if r.status_code in (200, 201): - url = r.json().get("browser_download_url", "") - print(f" [+] Uploaded to release: {url}") - return url - print(f" [!] Upload failed ({r.status_code}): {r.text[:300]}") + dl = r.json().get("browser_download_url", "") + print(f" [+] Uploaded: {dl}") + return dl + print(f" [!] Upload failed ({r.status_code}): {r.text[:300]}") return "" -def scrape_app(app): - name = app["name"] - package = app["package"] - version = app.get("version") - base_url = app["apkmirror_url"].rstrip("/") + "/" - arch = app.get("arch", "nodpi") - - print(f"\n[>] {name} | pkg={package} | ver={version or 'latest'} | arch={arch}") - - # Step 1: Find release page - if version: - release_page = find_release_page(base_url, version) - else: - release_page = find_latest_release_page(base_url) - - if not release_page: - print(f" [FAIL] Could not find release page") - return None +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- - # Step 2: Find APK variant page - variant_page = find_apk_variant_page(release_page, arch) - if not variant_page: - print(f" [FAIL] Could not find variant page") - return None - - # Step 3: Get interstitial download page - interstitial = get_download_page_url(variant_page) - if not interstitial: - print(f" [FAIL] Could not find download page") - return None - - # Step 4: Get final APK CDN URL - final_url = get_final_apk_url(interstitial) - if not final_url: - print(f" [FAIL] Could not get final APK URL") - return None - print(f" [~] Final APK URL: {final_url}") - - # Step 5: Download - DOWNLOAD_DIR.mkdir(exist_ok=True) - ver_tag = (version or "latest").replace(" ", "_") - filename = f"{package}-{ver_tag}.apk" - dest = DOWNLOAD_DIR / filename - - if not download_apk(final_url, dest): - print(f" [FAIL] Download failed") - return None - - return dest +def try_all_sources(app): + """Try APKMirror -> Uptodown -> APKPure -> APKCombo in order.""" + sources = [ + ("APKMirror", apkmirror_get), + ("Uptodown", uptodown_get), + ("APKPure", apkpure_get), + ("APKCombo", apkcombo_get), + ] + for source_name, fn in sources: + try: + result = fn(app) + if result and result.exists(): + print(f" [OK] Got APK from {source_name}") + return result, source_name + except Exception as e: + print(f" [!] {source_name} threw exception: {e}") + time.sleep(2) + return None, None def main(): @@ -387,37 +462,44 @@ def main(): sys.exit(1) apps = load_apps() - print(f"[*] Loaded {len(apps)} apps") + print(f"[*] {len(apps)} apps to process") release_id, upload_url = get_or_create_release() print(f"[*] Release id={release_id}") - existing = list_release_assets(release_id) - print(f"[*] Existing assets: {len(existing)}") + existing = list_assets(release_id) + print(f"[*] Existing assets in release: {len(existing)}") results = [] for app in apps: - apk_path = scrape_app(app) + name = app["name"] + package = app["package"] + version = app.get("version", "latest") + print(f"\n[>>>] {name} | {package} | v{version}") + + apk_path, source = try_all_sources(app) + if not apk_path: - results.append({"name": app["name"], "status": "FAILED", "url": ""}) - time.sleep(3) + print(f" [FAIL] All sources failed for {name}") + results.append({"name": name, "status": "FAILED", "source": None, "url": ""}) + time.sleep(2) continue - # Replace old asset if exists + # Replace old asset if apk_path.name in existing: - print(f" [~] Deleting old asset: {apk_path.name}") + print(f" [~] Replacing existing asset {apk_path.name}") delete_asset(existing[apk_path.name]) dl_url = upload_asset(upload_url, apk_path) results.append({ - "name": app["name"], - "package": app["package"], - "version": app.get("version", "latest"), + "name": name, + "package": package, + "version": version, + "source": source, "status": "OK" if dl_url else "UPLOAD_FAILED", "url": dl_url }) - apk_path.unlink(missing_ok=True) - time.sleep(3) # polite delay between apps + time.sleep(3) with open("scrape_results.json", "w") as f: json.dump(results, f, indent=2) @@ -429,7 +511,7 @@ def main(): if failed: print(f"FAILED: {', '.join(failed)}") sys.exit(1) - print("Done!") + print("All done!") if __name__ == "__main__": From 8554a5806b0d47849ccb38f4b9507f824347499a Mon Sep 17 00:00:00 2001 From: myst25 <83259411+myst-25@users.noreply.github.com> Date: Wed, 22 Apr 2026 22:43:38 +0530 Subject: [PATCH 05/24] feat: add uptodown, apkpure, apkcombo urls to all apps for multi-source fallback --- apps.json | 110 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 88 insertions(+), 22 deletions(-) diff --git a/apps.json b/apps.json index 8339991..505c477 100644 --- a/apps.json +++ b/apps.json @@ -3,154 +3,220 @@ "name": "YouTube", "package": "com.google.android.youtube", "version": "20.47.62", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/google-inc/youtube/youtube-20-47-62-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://youtube.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/youtube/com.google.android.youtube", + "apkcombo_url": "https://apkcombo.com/apk/com.google.android.youtube" }, { "name": "YouTube Music", "package": "com.google.android.apps.youtube.music", "version": "8.47.56", + "arch": "arm64-v8a", "apkmirror_url": "https://www.apkmirror.com/apk/google-inc/youtube-music/youtube-music-8-47-56-release/", - "arch": "arm64-v8a" + "uptodown_dlurl": "https://youtube-music.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/youtube-music/com.google.android.apps.youtube.music", + "apkcombo_url": "https://apkcombo.com/apk/com.google.android.apps.youtube.music" }, { "name": "Reddit", "package": "com.reddit.frontpage", "version": "2026.10.0", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/redditinc/reddit/reddit-2026-10-0-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://reddit-official-app.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/reddit/com.reddit.frontpage", + "apkcombo_url": "https://apkcombo.com/apk/com.reddit.frontpage" }, { "name": "Amazon Prime Video", "package": "com.amazon.avod.thirdpartyclient", "version": "3.0.447.757", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/amazon-mobile-llc/amazon-prime-video/amazon-prime-video-3-0-447-757-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://amazon-prime-video.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/amazon-prime-video/com.amazon.avod.thirdpartyclient", + "apkcombo_url": "https://apkcombo.com/apk/com.amazon.avod.thirdpartyclient" }, { "name": "Duolingo", "package": "com.duolingo", "version": "6.74.4", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/duolingo/duolingo-duolingo/duolingo-duolingo-6-74-4-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://duolingo.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/duolingo/com.duolingo", + "apkcombo_url": "https://apkcombo.com/apk/com.duolingo" }, { "name": "Proton VPN", "package": "ch.protonvpn.android", "version": "5.16.83.0", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/proton-technologies-ag/proton-vpn-fast-secure/proton-vpn-fast-secure-5-16-83-0-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://proton-vpn.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/proton-vpn/ch.protonvpn.android", + "apkcombo_url": "https://apkcombo.com/apk/ch.protonvpn.android" }, { "name": "SoundCloud", "package": "com.soundcloud.android", "version": "2026.03.20-release", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/soundcloud/soundcloud-play-music-songs/soundcloud-play-music-songs-2026-03-20-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://soundcloud.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/soundcloud/com.soundcloud.android", + "apkcombo_url": "https://apkcombo.com/apk/com.soundcloud.android" }, { "name": "Pandora", "package": "com.pandora.android", "version": null, + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/pandora/pandora-radio-music-podcasts/", - "arch": "nodpi" + "uptodown_dlurl": "https://pandora.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/pandora/com.pandora.android", + "apkcombo_url": "https://apkcombo.com/apk/com.pandora.android" }, { "name": "WPS Office", "package": "cn.wps.moffice_eng", "version": "18.24", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/kingsoft-office-software-corporation-limited/wps-office-free-office-suite-for-word-pdf-excel/wps-office-free-office-suite-for-word-pdf-excel-18-24-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://wps-office.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/wps-office/cn.wps.moffice_eng", + "apkcombo_url": "https://apkcombo.com/apk/cn.wps.moffice_eng" }, { "name": "MyFitnessPal", "package": "com.myfitnesspal.android", "version": "25.50.0", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/under-armour/myfitnesspal/myfitnesspal-25-50-0-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://myfitnesspal.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/myfitnesspal/com.myfitnesspal.android", + "apkcombo_url": "https://apkcombo.com/apk/com.myfitnesspal.android" }, { "name": "AdGuard", "package": "com.adguard.android", "version": "4.12.81", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/adguard-software-limited/adguard-ad-blocker/adguard-ad-blocker-4-12-81-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://adguard-content-blocker.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/adguard/com.adguard.android", + "apkcombo_url": "https://apkcombo.com/apk/com.adguard.android" }, { "name": "SofaScore", "package": "com.sofascore.results", "version": "25.12.17", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/sofascore/sofascore-sports-scores/sofascore-sports-scores-25-12-17-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://sofascore.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/sofascore/com.sofascore.results", + "apkcombo_url": "https://apkcombo.com/apk/com.sofascore.results" }, { "name": "Nova Launcher", "package": "com.teslacoilsw.launcher", "version": "81042", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/teslacoil-software/nova-launcher/nova-launcher-8-5-1-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://nova-launcher.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/nova-launcher/com.teslacoilsw.launcher", + "apkcombo_url": "https://apkcombo.com/apk/com.teslacoilsw.launcher" }, { "name": "Podcast Addict", "package": "com.bambuna.podcastaddict", "version": "2026.4", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/xavier-michiels/podcast-addict/podcast-addict-2026-4-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://podcast-addict.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/podcast-addict/com.bambuna.podcastaddict", + "apkcombo_url": "https://apkcombo.com/apk/com.bambuna.podcastaddict" }, { "name": "Xodo PDF", "package": "com.xodo.pdf.reader", "version": "10.13.0", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/xodo-technologies-inc/xodo-pdf-reader-editor/xodo-pdf-reader-editor-10-13-0-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://xodo-pdf-reader-editor.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/xodo-pdf/com.xodo.pdf.reader", + "apkcombo_url": "https://apkcombo.com/apk/com.xodo.pdf.reader" }, { "name": "Solid Explorer", "package": "pl.solidexplorer2", "version": "3.4.1", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/neatbytes/solid-explorer-file-manager/solid-explorer-file-manager-3-4-1-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://solid-explorer.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/solid-explorer/pl.solidexplorer2", + "apkcombo_url": "https://apkcombo.com/apk/pl.solidexplorer2" }, { "name": "ibis Paint X", "package": "jp.ne.ibis.ibispaintx.app", "version": "14.0.0", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/ibis-inc/ibis-paint-x/ibis-paint-x-14-0-0-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://ibis-paint-x.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/ibis-paint-x/jp.ne.ibis.ibispaintx.app", + "apkcombo_url": "https://apkcombo.com/apk/jp.ne.ibis.ibispaintx.app" }, { "name": "Windy", "package": "com.windyty.android", "version": "49.0.1", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/windyty-se/windy-wind-weather-forecast/windy-wind-weather-forecast-49-0-1-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://windy.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/windy/com.windyty.android", + "apkcombo_url": "https://apkcombo.com/apk/com.windyty.android" }, { "name": "Merriam-Webster", "package": "com.merriamwebster", "version": null, + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/merriam-webster-inc/merriam-webster-dictionary/", - "arch": "nodpi" + "uptodown_dlurl": "https://merriam-webster.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/merriam-webster/com.merriamwebster", + "apkcombo_url": "https://apkcombo.com/apk/com.merriamwebster" }, { "name": "Mimo", "package": "com.getmimo", "version": "9.0", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/mimo-learn-coding-programming/mimo-learn-coding-programming/mimo-learn-coding-programming-9-0-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://mimo.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/mimo/com.getmimo", + "apkcombo_url": "https://apkcombo.com/apk/com.getmimo" }, { "name": "Wallpaper Craft", "package": "com.wallpaperscraft.wallpaper", "version": "3.61.01", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/wallpaperscraft/wallpaper-engine-live-wallpaper/wallpaper-engine-live-wallpaper-3-61-01-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://wallpaperscraft.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/wallpaperscraft/com.wallpaperscraft.wallpaper", + "apkcombo_url": "https://apkcombo.com/apk/com.wallpaperscraft.wallpaper" }, { "name": "FotMob", "package": "com.mobilefootie.wc2010", "version": "226.16092.20260302", + "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/fotmob-as/fotmob-live-soccer-scores/fotmob-live-soccer-scores-226-16092-20260302-release/", - "arch": "nodpi" + "uptodown_dlurl": "https://fotmob.en.uptodown.com/android", + "apkpure_url": "https://apkpure.net/fotmob/com.mobilefootie.wc2010", + "apkcombo_url": "https://apkcombo.com/apk/com.mobilefootie.wc2010" } ] From 5526d9152ee7d375d7e4abb7d74147f8f4cfabd3 Mon Sep 17 00:00:00 2001 From: myst25 <83259411+myst-25@users.noreply.github.com> Date: Wed, 22 Apr 2026 23:22:29 +0530 Subject: [PATCH 06/24] fix: remove APKPure, improve Uptodown + APKCombo scrapers, add better debug logging --- apps.json | 22 --- scraper.py | 527 +++++++++++++++++++++++------------------------------ 2 files changed, 233 insertions(+), 316 deletions(-) diff --git a/apps.json b/apps.json index 505c477..6d7c3be 100644 --- a/apps.json +++ b/apps.json @@ -6,7 +6,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/google-inc/youtube/youtube-20-47-62-release/", "uptodown_dlurl": "https://youtube.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/youtube/com.google.android.youtube", "apkcombo_url": "https://apkcombo.com/apk/com.google.android.youtube" }, { @@ -16,7 +15,6 @@ "arch": "arm64-v8a", "apkmirror_url": "https://www.apkmirror.com/apk/google-inc/youtube-music/youtube-music-8-47-56-release/", "uptodown_dlurl": "https://youtube-music.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/youtube-music/com.google.android.apps.youtube.music", "apkcombo_url": "https://apkcombo.com/apk/com.google.android.apps.youtube.music" }, { @@ -26,7 +24,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/redditinc/reddit/reddit-2026-10-0-release/", "uptodown_dlurl": "https://reddit-official-app.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/reddit/com.reddit.frontpage", "apkcombo_url": "https://apkcombo.com/apk/com.reddit.frontpage" }, { @@ -36,7 +33,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/amazon-mobile-llc/amazon-prime-video/amazon-prime-video-3-0-447-757-release/", "uptodown_dlurl": "https://amazon-prime-video.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/amazon-prime-video/com.amazon.avod.thirdpartyclient", "apkcombo_url": "https://apkcombo.com/apk/com.amazon.avod.thirdpartyclient" }, { @@ -46,7 +42,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/duolingo/duolingo-duolingo/duolingo-duolingo-6-74-4-release/", "uptodown_dlurl": "https://duolingo.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/duolingo/com.duolingo", "apkcombo_url": "https://apkcombo.com/apk/com.duolingo" }, { @@ -56,7 +51,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/proton-technologies-ag/proton-vpn-fast-secure/proton-vpn-fast-secure-5-16-83-0-release/", "uptodown_dlurl": "https://proton-vpn.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/proton-vpn/ch.protonvpn.android", "apkcombo_url": "https://apkcombo.com/apk/ch.protonvpn.android" }, { @@ -66,7 +60,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/soundcloud/soundcloud-play-music-songs/soundcloud-play-music-songs-2026-03-20-release/", "uptodown_dlurl": "https://soundcloud.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/soundcloud/com.soundcloud.android", "apkcombo_url": "https://apkcombo.com/apk/com.soundcloud.android" }, { @@ -76,7 +69,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/pandora/pandora-radio-music-podcasts/", "uptodown_dlurl": "https://pandora.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/pandora/com.pandora.android", "apkcombo_url": "https://apkcombo.com/apk/com.pandora.android" }, { @@ -86,7 +78,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/kingsoft-office-software-corporation-limited/wps-office-free-office-suite-for-word-pdf-excel/wps-office-free-office-suite-for-word-pdf-excel-18-24-release/", "uptodown_dlurl": "https://wps-office.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/wps-office/cn.wps.moffice_eng", "apkcombo_url": "https://apkcombo.com/apk/cn.wps.moffice_eng" }, { @@ -96,7 +87,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/under-armour/myfitnesspal/myfitnesspal-25-50-0-release/", "uptodown_dlurl": "https://myfitnesspal.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/myfitnesspal/com.myfitnesspal.android", "apkcombo_url": "https://apkcombo.com/apk/com.myfitnesspal.android" }, { @@ -106,7 +96,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/adguard-software-limited/adguard-ad-blocker/adguard-ad-blocker-4-12-81-release/", "uptodown_dlurl": "https://adguard-content-blocker.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/adguard/com.adguard.android", "apkcombo_url": "https://apkcombo.com/apk/com.adguard.android" }, { @@ -116,7 +105,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/sofascore/sofascore-sports-scores/sofascore-sports-scores-25-12-17-release/", "uptodown_dlurl": "https://sofascore.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/sofascore/com.sofascore.results", "apkcombo_url": "https://apkcombo.com/apk/com.sofascore.results" }, { @@ -126,7 +114,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/teslacoil-software/nova-launcher/nova-launcher-8-5-1-release/", "uptodown_dlurl": "https://nova-launcher.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/nova-launcher/com.teslacoilsw.launcher", "apkcombo_url": "https://apkcombo.com/apk/com.teslacoilsw.launcher" }, { @@ -136,7 +123,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/xavier-michiels/podcast-addict/podcast-addict-2026-4-release/", "uptodown_dlurl": "https://podcast-addict.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/podcast-addict/com.bambuna.podcastaddict", "apkcombo_url": "https://apkcombo.com/apk/com.bambuna.podcastaddict" }, { @@ -146,7 +132,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/xodo-technologies-inc/xodo-pdf-reader-editor/xodo-pdf-reader-editor-10-13-0-release/", "uptodown_dlurl": "https://xodo-pdf-reader-editor.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/xodo-pdf/com.xodo.pdf.reader", "apkcombo_url": "https://apkcombo.com/apk/com.xodo.pdf.reader" }, { @@ -156,7 +141,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/neatbytes/solid-explorer-file-manager/solid-explorer-file-manager-3-4-1-release/", "uptodown_dlurl": "https://solid-explorer.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/solid-explorer/pl.solidexplorer2", "apkcombo_url": "https://apkcombo.com/apk/pl.solidexplorer2" }, { @@ -166,7 +150,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/ibis-inc/ibis-paint-x/ibis-paint-x-14-0-0-release/", "uptodown_dlurl": "https://ibis-paint-x.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/ibis-paint-x/jp.ne.ibis.ibispaintx.app", "apkcombo_url": "https://apkcombo.com/apk/jp.ne.ibis.ibispaintx.app" }, { @@ -176,7 +159,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/windyty-se/windy-wind-weather-forecast/windy-wind-weather-forecast-49-0-1-release/", "uptodown_dlurl": "https://windy.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/windy/com.windyty.android", "apkcombo_url": "https://apkcombo.com/apk/com.windyty.android" }, { @@ -186,7 +168,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/merriam-webster-inc/merriam-webster-dictionary/", "uptodown_dlurl": "https://merriam-webster.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/merriam-webster/com.merriamwebster", "apkcombo_url": "https://apkcombo.com/apk/com.merriamwebster" }, { @@ -196,7 +177,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/mimo-learn-coding-programming/mimo-learn-coding-programming/mimo-learn-coding-programming-9-0-release/", "uptodown_dlurl": "https://mimo.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/mimo/com.getmimo", "apkcombo_url": "https://apkcombo.com/apk/com.getmimo" }, { @@ -206,7 +186,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/wallpaperscraft/wallpaper-engine-live-wallpaper/wallpaper-engine-live-wallpaper-3-61-01-release/", "uptodown_dlurl": "https://wallpaperscraft.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/wallpaperscraft/com.wallpaperscraft.wallpaper", "apkcombo_url": "https://apkcombo.com/apk/com.wallpaperscraft.wallpaper" }, { @@ -216,7 +195,6 @@ "arch": "nodpi", "apkmirror_url": "https://www.apkmirror.com/apk/fotmob-as/fotmob-live-soccer-scores/fotmob-live-soccer-scores-226-16092-20260302-release/", "uptodown_dlurl": "https://fotmob.en.uptodown.com/android", - "apkpure_url": "https://apkpure.net/fotmob/com.mobilefootie.wc2010", "apkcombo_url": "https://apkcombo.com/apk/com.mobilefootie.wc2010" } ] diff --git a/scraper.py b/scraper.py index 7ffd3e4..b4c1d4a 100644 --- a/scraper.py +++ b/scraper.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 """ Morphe APK Scraper -Tries multiple sources in order: APKMirror -> Uptodown -> APKPure -> APKCombo -Uploads successfully downloaded APKs to GitHub Releases. +Sources (in order): APKMirror -> Uptodown -> APKCombo +Uploads APKs to GitHub Releases tag 'apks'. """ import json @@ -11,6 +11,7 @@ import sys import time from pathlib import Path +from urllib.parse import urljoin import requests from bs4 import BeautifulSoup @@ -23,495 +24,433 @@ "Referer": "https://www.google.com/", } -APKMIRROR_BASE = "https://www.apkmirror.com" -UPTODOWN_BASE = "https://uptodown.com" -APKPURE_BASE = "https://apkpure.net" -APKCOMBO_BASE = "https://apkcombo.com" - DOWNLOAD_DIR = Path("apks") -APPS_FILE = Path("apps.json") -GITHUB_REPO = os.environ.get("GITHUB_REPOSITORY", "myst-25/morphe-apk-scraper") +APPS_FILE = Path("apps.json") +GITHUB_REPO = os.environ.get("GITHUB_REPOSITORY", "myst-25/morphe-apk-scraper") GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "") -RELEASE_TAG = "apks" +RELEASE_TAG = "apks" + +SESSION = requests.Session() +SESSION.headers.update(HEADERS) -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- +# ── helpers ──────────────────────────────────────────────────────────────── def load_apps(): with open(APPS_FILE) as f: return json.load(f) -def get_soup(url, retries=3, delay=5, extra_headers=None): - h = {**HEADERS, **(extra_headers or {})} - for attempt in range(retries): +def get(url, retries=3, delay=5): + for i in range(retries): try: - resp = requests.get(url, headers=h, timeout=30, allow_redirects=True) - if resp.status_code == 429: - wait = int(resp.headers.get("Retry-After", 30)) - print(f" [rate-limit] waiting {wait}s") + r = SESSION.get(url, timeout=30, allow_redirects=True) + print(f" GET {url} -> HTTP {r.status_code}") + if r.status_code == 429: + wait = int(r.headers.get("Retry-After", 30)) + print(f" rate-limited, sleeping {wait}s") time.sleep(wait) continue - if resp.status_code == 404: + if r.status_code == 404: return None - resp.raise_for_status() - return BeautifulSoup(resp.text, "html.parser") + r.raise_for_status() + return r except Exception as e: - print(f" [!] attempt {attempt+1} failed: {e}") - time.sleep(delay * (attempt + 1)) + print(f" attempt {i+1} error: {e}") + time.sleep(delay * (i + 1)) return None -def ver_slug(version): - """'20.47.62' -> '20-47-62'""" - return re.sub(r"[^a-zA-Z0-9]+", "-", version).strip("-").lower() +def soup(url): + r = get(url) + return BeautifulSoup(r.text, "html.parser") if r else None + + +def ver_slug(v): + return re.sub(r"[^a-zA-Z0-9]+", "-", v).strip("-").lower() -def download_file(url, dest_path, retries=3, extra_headers=None): - h = {**HEADERS, **(extra_headers or {})} - for attempt in range(retries): +def download(url, dest, retries=3): + for i in range(retries): try: - with requests.get(url, headers=h, stream=True, - timeout=180, allow_redirects=True) as r: - r.raise_for_status() + with SESSION.get(url, stream=True, timeout=180, + allow_redirects=True) as r: ct = r.headers.get("Content-Type", "") - if "text/html" in ct: - print(f" [!] Got HTML page instead of APK — likely blocked") + print(f" DL {url[:80]} ct={ct} status={r.status_code}") + if "text/html" in ct or r.status_code >= 400: + print(f" blocked or error, skipping") return False - with open(dest_path, "wb") as f: + dest.parent.mkdir(parents=True, exist_ok=True) + with open(dest, "wb") as f: for chunk in r.iter_content(65536): f.write(chunk) - size = dest_path.stat().st_size + size = dest.stat().st_size if size < 500_000: - print(f" [!] File too small ({size} bytes), not a valid APK") - dest_path.unlink(missing_ok=True) + print(f" too small ({size} B), not valid APK") + dest.unlink(missing_ok=True) return False - print(f" [+] Downloaded {dest_path.name} ({size // 1024 // 1024} MB)") + print(f" saved {dest.name} ({size//1024//1024} MB)") return True except Exception as e: - print(f" [!] Download attempt {attempt+1} failed: {e}") - time.sleep(8 * (attempt + 1)) + print(f" download attempt {i+1} error: {e}") + time.sleep(8 * (i + 1)) return False -# --------------------------------------------------------------------------- -# Source 1: APKMirror -# --------------------------------------------------------------------------- +# ── Source 1: APKMirror ──────────────────────────────────────────────────── -def apkmirror_get(app): - base = app["apkmirror_url"].rstrip("/") + "/" +def apkmirror(app): + print(" [APKMirror]") + base = app["apkmirror_url"].rstrip("/") + "/" version = app.get("version") - arch = app.get("arch", "nodpi") + arch = app.get("arch", "nodpi") package = app["package"] - print(" [APKMirror] trying...") - - # Step 1: Find release page - soup = get_soup(base) - if not soup: - return None - + # 1. release page release_page = None if version: + s = soup(base) + if not s: + return None slug = ver_slug(version) - for a in soup.find_all("a", href=True): - if slug in a["href"] and "/apk/" in a["href"] and "download" not in a["href"]: - release_page = APKMIRROR_BASE + a["href"] if a["href"].startswith("/") else a["href"] + for a in s.find_all("a", href=True): + h = a["href"] + if slug in h and "/apk/" in h and "download" not in h: + release_page = ("https://www.apkmirror.com" + h + if h.startswith("/") else h) break if not release_page: - # construct directly app_slug = base.rstrip("/").split("/")[-1] release_page = f"{base}{app_slug}-{slug}-release/" else: - for a in soup.find_all("a", href=re.compile(r"-release/$")): - release_page = APKMIRROR_BASE + a["href"] if a["href"].startswith("/") else a["href"] - break - + s = soup(base) + if not s: + return None + a = s.find("a", href=re.compile(r"-release/$")) + if a: + h = a["href"] + release_page = ("https://www.apkmirror.com" + h + if h.startswith("/") else h) if not release_page: - print(" [!] Could not find release page") + print(" no release page found") return None print(f" release_page={release_page}") - # Step 2: Find variant page (individual APK) - soup2 = get_soup(release_page) - if not soup2: + # 2. variant page + s2 = soup(release_page) + if not s2: return None - - variant_page = None candidates = [] - for a in soup2.find_all("a", href=re.compile(r"/apk/.+/\d+/$")): - parent_text = (a.find_parent() or a).get_text(" ", strip=True).upper() - if "BUNDLE" in parent_text or "APKM" in parent_text: + for a in s2.find_all("a", href=re.compile(r"/apk/.+/\d+/$")): + pt = (a.find_parent() or a).get_text(" ", strip=True).upper() + if "BUNDLE" in pt or "APKM" in pt: continue - candidates.append((a["href"], parent_text)) + candidates.append((a["href"], pt)) def score(item): - href, text = item - t = text.lower() + h, t = item + t = t.lower() if arch and arch != "nodpi" and arch.lower() in t: return 0 if "nodpi" in t or "universal" in t: return 1 return 2 - if candidates: - candidates.sort(key=score) - href = candidates[0][0] - variant_page = APKMIRROR_BASE + href if href.startswith("/") else href - if not variant_page: - print(" [!] No variant page found") + if not candidates: + print(" no variant candidates") return None + candidates.sort(key=score) + vh = candidates[0][0] + variant_page = ("https://www.apkmirror.com" + vh + if vh.startswith("/") else vh) print(f" variant_page={variant_page}") - # Step 3: Interstitial download page - soup3 = get_soup(variant_page) - if not soup3: + # 3. interstitial + s3 = soup(variant_page) + if not s3: return None - btn = soup3.find("a", href=re.compile(r"download/\?key=")) + btn = s3.find("a", href=re.compile(r"download/\?key=")) if not btn: - print(" [!] No download button found") + print(" no download button") return None - interstitial = APKMIRROR_BASE + btn["href"] if btn["href"].startswith("/") else btn["href"] + ih = btn["href"] + interstitial = ("https://www.apkmirror.com" + ih + if ih.startswith("/") else ih) print(f" interstitial={interstitial}") - # Step 4: Final CDN URL - soup4 = get_soup(interstitial) - if not soup4: + # 4. CDN url + s4 = soup(interstitial) + if not s4: return None final = None - for a in soup4.find_all("a", href=True): + for a in s4.find_all("a", href=True): if "cdn.apkmirror.com" in a["href"] or re.search(r"\.apk(\?|$)", a["href"]): final = a["href"] break if not final: - print(" [!] Final URL not found") + print(" no final CDN url") return None - print(f" final_url={final}") + print(f" final={final}") - dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ', '_')}.apk" - DOWNLOAD_DIR.mkdir(exist_ok=True) - return dest if download_file(final, dest) else None + dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ','_')}.apk" + return dest if download(final, dest) else None -# --------------------------------------------------------------------------- -# Source 2: Uptodown -# --------------------------------------------------------------------------- +# ── Source 2: Uptodown ───────────────────────────────────────────────────── -def uptodown_get(app): - uptodown_url = app.get("uptodown_dlurl") +def uptodown(app): + print(" [Uptodown]") + base = app.get("uptodown_dlurl", "").rstrip("/") version = app.get("version") package = app["package"] - - if not uptodown_url: + if not base: + print(" no uptodown_dlurl") return None - print(" [Uptodown] trying...") - base = uptodown_url.rstrip("/") - - # Uptodown version page pattern: {base}/versions + # Uptodown versions page versions_url = f"{base}/versions" - soup = get_soup(versions_url) - if not soup: - # try direct download page - soup = get_soup(base) - if not soup: - return None + s = soup(versions_url) + if not s: + s = soup(base) + if not s: + return None - # Find the version download link - dl_url = None + # Find download page for the specific version + dl_page = None if version: - # Look for link containing exact version text - for a in soup.find_all("a", href=True): - if version in a.get_text() or version in a["href"]: - dl_url = a["href"] + # version rows look like: /android/post-download/XXXXX + for a in s.find_all("a", href=True): + text = a.get_text(strip=True) + if version in text: + dl_page = a["href"] break - if not dl_url: - # Latest: find first .apk or /download/ link - for a in soup.find_all("a", href=re.compile(r"/(download|post-download)/")): - dl_url = a["href"] - break - - if not dl_url: - print(" [!] No download link found on Uptodown") - return None - - if not dl_url.startswith("http"): - from urllib.parse import urljoin - dl_url = urljoin(base, dl_url) - print(f" dl_url={dl_url}") - - # Navigate to download page to get direct link - soup2 = get_soup(dl_url) - final = None - if soup2: - btn = soup2.find("a", id="detail-download-button") or \ - soup2.find("a", href=re.compile(r"\.apk")) - if btn: - final = btn["href"] - if not final.startswith("http"): - from urllib.parse import urljoin - final = urljoin(dl_url, final) - if not final: - final = dl_url # try directly - - print(f" final_url={final}") - dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ', '_')}.apk" - DOWNLOAD_DIR.mkdir(exist_ok=True) - return dest if download_file(final, dest) else None - - -# --------------------------------------------------------------------------- -# Source 3: APKPure -# --------------------------------------------------------------------------- - -def apkpure_get(app): - package = app["package"] - version = app.get("version") - print(" [APKPure] trying...") + if not dl_page: + # try direct download URL pattern + # Uptodown stores version in the download slug + for a in s.find_all("a", href=re.compile(r"post-download|/download")): + parent_text = (a.find_parent() or a).get_text(" ", strip=True) + if version in parent_text: + dl_page = a["href"] + break + if not dl_page: + # fallback: grab the first download link (latest) + a = s.find("a", href=re.compile(r"post-download|/download")) + if a: + dl_page = a["href"] - # APKPure search URL - search_url = f"https://apkpure.net/search?q={package}" - soup = get_soup(search_url) - if not soup: + if not dl_page: + print(" no download page link found") return None - # Find app page link - app_link = None - for a in soup.find_all("a", href=re.compile(r"/" + re.escape(package.split(".")[-1].lower()))): - app_link = a["href"] - break - if not app_link: - # Direct URL guess - app_name_slug = package.replace(".", "-").lower() - app_link = f"https://apkpure.net/{app_name_slug}/{package}" - elif not app_link.startswith("http"): - app_link = "https://apkpure.net" + app_link - - print(f" app_page={app_link}") - - # Get download page - dl_page = f"{app_link}/download" - if version: - dl_page = f"{app_link}/{version}/download" + if not dl_page.startswith("http"): + dl_page = urljoin(base, dl_page) + print(f" dl_page={dl_page}") - soup2 = get_soup(dl_page) - if not soup2: - soup2 = get_soup(app_link) - if not soup2: - return None - - # Find APK download link + # Hit the download page to get the real APK link + s2 = soup(dl_page) final = None - for a in soup2.find_all("a", href=True): - href = a["href"] - if ".apk" in href and ("download" in href or "dw.apkpure" in href): - final = href - break - if not final: - btn = soup2.find("a", id="download_link") or soup2.find("a", class_=re.compile(r"download")) + if s2: + # Uptodown puts the APK link in a button or meta refresh + btn = (s2.find("a", id="detail-download-button") or + s2.find("a", attrs={"data-url": True}) or + s2.find("a", href=re.compile(r"\.apk"))) if btn: - final = btn.get("href", "") + final = btn.get("href") or btn.get("data-url", "") + # meta refresh fallback + if not final: + meta = s2.find("meta", attrs={"http-equiv": "refresh"}) + if meta: + content = meta.get("content", "") + m = re.search(r"url=(.+)", content, re.I) + if m: + final = m.group(1).strip() if not final: - print(" [!] No download link found on APKPure") - return None + # last resort: try direct download from Uptodown CDN pattern + # https://{app}.en.uptodown.com/android/download/{id} + print(" no final link from download page, trying direct") + final = dl_page if not final.startswith("http"): - final = "https://apkpure.net" + final - print(f" final_url={final}") + final = urljoin(base, final) + print(f" final={final}") - dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ', '_')}.apk" - DOWNLOAD_DIR.mkdir(exist_ok=True) - return dest if download_file(final, dest) else None + dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ','_')}.apk" + return dest if download(final, dest) else None -# --------------------------------------------------------------------------- -# Source 4: APKCombo -# --------------------------------------------------------------------------- +# ── Source 3: APKCombo ───────────────────────────────────────────────────── -def apkcombo_get(app): +def apkcombo(app): + print(" [APKCombo]") package = app["package"] version = app.get("version") - print(" [APKCombo] trying...") + base_url = app.get("apkcombo_url", f"https://apkcombo.com/apk/{package}") - app_url = f"https://apkcombo.com/apk/{package}" + url = base_url if version: - app_url = f"https://apkcombo.com/apk/{package}/{version}" + url = f"{base_url}/{version}" + print(f" url={url}") - soup = get_soup(app_url) - if not soup: + s = soup(url) + if not s: + s = soup(base_url) + if not s: return None - # Find direct APK download link + # APKCombo: look for direct .apk href or a download button final = None - for a in soup.find_all("a", href=True): - href = a["href"] - if ".apk" in href and ("download" in href or "apkcombo" in href or "filedownload" in href): - final = href + for a in s.find_all("a", href=True): + h = a["href"] + if re.search(r"\.apk(\?|$)", h): + final = h break if not final: - a = soup.find("a", class_=re.compile(r"download", re.I)) - if a: - final = a.get("href", "") + # APKCombo download button is often in a form or data attr + for tag in s.find_all(attrs={"data-src": re.compile(r"\.apk")}): + final = tag["data-src"] + break + if not final: + btn = s.find("a", class_=re.compile(r"download", re.I)) + if btn: + final = btn.get("href", "") if not final: - print(" [!] No download link found on APKCombo") + print(" no download link found") return None if not final.startswith("http"): final = "https://apkcombo.com" + final - print(f" final_url={final}") + print(f" final={final}") - dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ', '_')}.apk" - DOWNLOAD_DIR.mkdir(exist_ok=True) - return dest if download_file(final, dest) else None + dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ','_')}.apk" + return dest if download(final, dest) else None -# --------------------------------------------------------------------------- -# GitHub Release helpers -# --------------------------------------------------------------------------- +# ── GitHub Release helpers ───────────────────────────────────────────────── -def get_or_create_release(): - gh = { +def gh_headers(): + return { "Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28" } + +def get_or_create_release(): base = f"https://api.github.com/repos/{GITHUB_REPO}" - r = requests.get(f"{base}/releases/tags/{RELEASE_TAG}", headers=gh) + r = requests.get(f"{base}/releases/tags/{RELEASE_TAG}", headers=gh_headers()) if r.status_code == 200: d = r.json() return d["id"], d["upload_url"] - r = requests.post(f"{base}/releases", headers=gh, json={ + r = requests.post(f"{base}/releases", headers=gh_headers(), json={ "tag_name": RELEASE_TAG, "name": "APK Mirror", - "body": "Auto-scraped APKs for Morphe patching. Do not edit manually.", + "body": "Auto-scraped APKs for Morphe patching.", "prerelease": False }) r.raise_for_status() d = r.json() return d["id"], d["upload_url"] - def list_assets(release_id): - gh = {"Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json"} r = requests.get( f"https://api.github.com/repos/{GITHUB_REPO}/releases/{release_id}/assets", - headers=gh + headers=gh_headers() ) r.raise_for_status() return {a["name"]: a["id"] for a in r.json()} - def delete_asset(asset_id): - gh = {"Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json"} requests.delete( f"https://api.github.com/repos/{GITHUB_REPO}/releases/assets/{asset_id}", - headers=gh + headers=gh_headers() ) - -def upload_asset(upload_url, file_path): +def upload_asset(upload_url, path): url = re.sub(r"\{.*?\}", "", upload_url) - gh = { - "Authorization": f"Bearer {GITHUB_TOKEN}", - "Content-Type": "application/vnd.android.package-archive", - "Accept": "application/vnd.github+json" - } - with open(file_path, "rb") as f: - r = requests.post(url, headers=gh, - params={"name": file_path.name}, - data=f, timeout=600) + h = {**gh_headers(), "Content-Type": "application/vnd.android.package-archive"} + with open(path, "rb") as f: + r = requests.post(url, headers=h, + params={"name": path.name}, data=f, timeout=600) if r.status_code in (200, 201): dl = r.json().get("browser_download_url", "") - print(f" [+] Uploaded: {dl}") + print(f" uploaded -> {dl}") return dl - print(f" [!] Upload failed ({r.status_code}): {r.text[:300]}") + print(f" upload failed {r.status_code}: {r.text[:200]}") return "" -# --------------------------------------------------------------------------- -# Main -# --------------------------------------------------------------------------- +# ── main ─────────────────────────────────────────────────────────────────── -def try_all_sources(app): - """Try APKMirror -> Uptodown -> APKPure -> APKCombo in order.""" - sources = [ - ("APKMirror", apkmirror_get), - ("Uptodown", uptodown_get), - ("APKPure", apkpure_get), - ("APKCombo", apkcombo_get), - ] - for source_name, fn in sources: +def try_sources(app): + for name, fn in [("APKMirror", apkmirror), + ("Uptodown", uptodown), + ("APKCombo", apkcombo)]: try: result = fn(app) if result and result.exists(): - print(f" [OK] Got APK from {source_name}") - return result, source_name + print(f" -> SUCCESS via {name}") + return result, name except Exception as e: - print(f" [!] {source_name} threw exception: {e}") + print(f" -> {name} exception: {e}") time.sleep(2) return None, None def main(): if not GITHUB_TOKEN: - print("[!] GITHUB_TOKEN not set") + print("GITHUB_TOKEN not set") sys.exit(1) apps = load_apps() - print(f"[*] {len(apps)} apps to process") + print(f"[*] {len(apps)} apps") release_id, upload_url = get_or_create_release() - print(f"[*] Release id={release_id}") + print(f"[*] release_id={release_id}") existing = list_assets(release_id) - print(f"[*] Existing assets in release: {len(existing)}") + print(f"[*] existing assets={len(existing)}") results = [] for app in apps: - name = app["name"] + name = app["name"] package = app["package"] version = app.get("version", "latest") - print(f"\n[>>>] {name} | {package} | v{version}") + print(f"\n{'='*60}") + print(f"[APP] {name} | {package} | v{version}") + print(f"{'='*60}") - apk_path, source = try_all_sources(app) + apk, source = try_sources(app) - if not apk_path: - print(f" [FAIL] All sources failed for {name}") + if not apk: + print(f" FAILED all sources") results.append({"name": name, "status": "FAILED", "source": None, "url": ""}) time.sleep(2) continue - # Replace old asset - if apk_path.name in existing: - print(f" [~] Replacing existing asset {apk_path.name}") - delete_asset(existing[apk_path.name]) + if apk.name in existing: + print(f" replacing old asset {apk.name}") + delete_asset(existing[apk.name]) - dl_url = upload_asset(upload_url, apk_path) + dl_url = upload_asset(upload_url, apk) results.append({ - "name": name, - "package": package, - "version": version, + "name": name, "package": package, "version": version, "source": source, "status": "OK" if dl_url else "UPLOAD_FAILED", "url": dl_url }) - apk_path.unlink(missing_ok=True) + apk.unlink(missing_ok=True) time.sleep(3) with open("scrape_results.json", "w") as f: json.dump(results, f, indent=2) - print("\n=== Summary ===") - ok = [r for r in results if r["status"] == "OK"] + ok = [r for r in results if r["status"] == "OK"] failed = [r["name"] for r in results if r["status"] != "OK"] - print(f"OK: {len(ok)}/{len(results)}") + print(f"\n[SUMMARY] OK={len(ok)}/{len(results)}") if failed: - print(f"FAILED: {', '.join(failed)}") + print(f"[FAILED] {', '.join(failed)}") sys.exit(1) - print("All done!") + print("[DONE]") if __name__ == "__main__": From 7bd13536ac55fe03a0e68ee81d5fb0a285a45005 Mon Sep 17 00:00:00 2001 From: myst25 <83259411+myst-25@users.noreply.github.com> Date: Wed, 22 Apr 2026 23:40:54 +0530 Subject: [PATCH 07/24] feat: rewrite scraper for local laptop use - downloads to cwd, auto-cleanup after upload --- scraper.py | 384 +++++++++++++++++++++++++++-------------------------- 1 file changed, 197 insertions(+), 187 deletions(-) diff --git a/scraper.py b/scraper.py index b4c1d4a..ec0f84e 100644 --- a/scraper.py +++ b/scraper.py @@ -1,8 +1,13 @@ #!/usr/bin/env python3 """ -Morphe APK Scraper -Sources (in order): APKMirror -> Uptodown -> APKCombo -Uploads APKs to GitHub Releases tag 'apks'. +Morphe APK Scraper - Run locally on your laptop +Usage: + export GITHUB_TOKEN="your_token" + python scraper.py + +Downloads APKs into the current directory, uploads each one to the +GitHub release as soon as it's downloaded, then deletes the local file. +At the end it verifies every app is present in the release. """ import json @@ -16,39 +21,47 @@ import requests from bs4 import BeautifulSoup +# ── config ──────────────────────────────────────────────────────────────────── +GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "") +GITHUB_REPO = os.environ.get("GITHUB_REPOSITORY", "myst-25/morphe-apk-scraper") +RELEASE_TAG = "apks" +APPS_FILE = Path(__file__).parent / "apps.json" +DOWNLOAD_DIR = Path(__file__).parent # save right here, no sub-folder + HEADERS = { - "User-Agent": "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 " - "(KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36", + "User-Agent": ( + "Mozilla/5.0 (Linux; Android 13; Pixel 7) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/124.0.6367.82 Mobile Safari/537.36" + ), "Accept-Language": "en-US,en;q=0.9", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Referer": "https://www.google.com/", } -DOWNLOAD_DIR = Path("apks") -APPS_FILE = Path("apps.json") -GITHUB_REPO = os.environ.get("GITHUB_REPOSITORY", "myst-25/morphe-apk-scraper") -GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "") -RELEASE_TAG = "apks" - SESSION = requests.Session() SESSION.headers.update(HEADERS) +# ── tiny helpers ────────────────────────────────────────────────────────────── -# ── helpers ──────────────────────────────────────────────────────────────── +def banner(text): + print(f"\n{'='*64}") + print(f" {text}") + print(f"{'='*64}") -def load_apps(): - with open(APPS_FILE) as f: - return json.load(f) +def log(msg): print(f" {msg}") +def ok(msg): print(f" \033[92m✔ {msg}\033[0m") +def err(msg): print(f" \033[91m✘ {msg}\033[0m") -def get(url, retries=3, delay=5): +def get_page(url, retries=3): for i in range(retries): try: r = SESSION.get(url, timeout=30, allow_redirects=True) - print(f" GET {url} -> HTTP {r.status_code}") + log(f"GET {url[:90]} → HTTP {r.status_code}") if r.status_code == 429: wait = int(r.headers.get("Retry-After", 30)) - print(f" rate-limited, sleeping {wait}s") + log(f"rate-limited, sleeping {wait}s") time.sleep(wait) continue if r.status_code == 404: @@ -56,13 +69,13 @@ def get(url, retries=3, delay=5): r.raise_for_status() return r except Exception as e: - print(f" attempt {i+1} error: {e}") - time.sleep(delay * (i + 1)) + log(f"attempt {i+1} failed: {e}") + time.sleep(5 * (i + 1)) return None def soup(url): - r = get(url) + r = get_page(url) return BeautifulSoup(r.text, "html.parser") if r else None @@ -70,73 +83,72 @@ def ver_slug(v): return re.sub(r"[^a-zA-Z0-9]+", "-", v).strip("-").lower() -def download(url, dest, retries=3): +def download_apk(url, dest, retries=3): + """Stream-download url → dest. Returns True if valid APK saved.""" for i in range(retries): try: with SESSION.get(url, stream=True, timeout=180, allow_redirects=True) as r: - ct = r.headers.get("Content-Type", "") - print(f" DL {url[:80]} ct={ct} status={r.status_code}") + ct = r.headers.get("Content-Type", "") + size = int(r.headers.get("Content-Length", 0)) + log(f"DL status={r.status_code} ct={ct} size={size//1024}KB") if "text/html" in ct or r.status_code >= 400: - print(f" blocked or error, skipping") + log("blocked / not an APK, skipping") return False - dest.parent.mkdir(parents=True, exist_ok=True) with open(dest, "wb") as f: - for chunk in r.iter_content(65536): + for chunk in r.iter_content(131072): f.write(chunk) - size = dest.stat().st_size - if size < 500_000: - print(f" too small ({size} B), not valid APK") + saved = dest.stat().st_size + if saved < 500_000: + log(f"file too small ({saved} B) – not a valid APK") dest.unlink(missing_ok=True) return False - print(f" saved {dest.name} ({size//1024//1024} MB)") + ok(f"saved {dest.name} ({saved // 1024 // 1024} MB)") return True except Exception as e: - print(f" download attempt {i+1} error: {e}") + log(f"download attempt {i+1} error: {e}") + dest.unlink(missing_ok=True) time.sleep(8 * (i + 1)) return False -# ── Source 1: APKMirror ──────────────────────────────────────────────────── +# ── Source 1 : APKMirror ────────────────────────────────────────────────────── -def apkmirror(app): - print(" [APKMirror]") +def src_apkmirror(app): + log("[APKMirror]") base = app["apkmirror_url"].rstrip("/") + "/" version = app.get("version") - arch = app.get("arch", "nodpi") package = app["package"] + arch = app.get("arch", "nodpi") + + # ── find release page + s = soup(base) + if not s: + return None - # 1. release page release_page = None if version: - s = soup(base) - if not s: - return None slug = ver_slug(version) for a in s.find_all("a", href=True): h = a["href"] if slug in h and "/apk/" in h and "download" not in h: - release_page = ("https://www.apkmirror.com" + h - if h.startswith("/") else h) + release_page = "https://www.apkmirror.com" + h if h.startswith("/") else h break if not release_page: app_slug = base.rstrip("/").split("/")[-1] release_page = f"{base}{app_slug}-{slug}-release/" else: - s = soup(base) - if not s: - return None a = s.find("a", href=re.compile(r"-release/$")) if a: h = a["href"] - release_page = ("https://www.apkmirror.com" + h - if h.startswith("/") else h) + release_page = "https://www.apkmirror.com" + h if h.startswith("/") else h + if not release_page: - print(" no release page found") + log("no release page found") return None - print(f" release_page={release_page}") + log(f"release_page={release_page}") - # 2. variant page + # ── pick best variant s2 = soup(release_page) if not s2: return None @@ -145,11 +157,10 @@ def apkmirror(app): pt = (a.find_parent() or a).get_text(" ", strip=True).upper() if "BUNDLE" in pt or "APKM" in pt: continue - candidates.append((a["href"], pt)) + candidates.append((a["href"], pt.lower())) def score(item): h, t = item - t = t.lower() if arch and arch != "nodpi" and arch.lower() in t: return 0 if "nodpi" in t or "universal" in t: @@ -157,28 +168,26 @@ def score(item): return 2 if not candidates: - print(" no variant candidates") + log("no variant candidates") return None candidates.sort(key=score) vh = candidates[0][0] - variant_page = ("https://www.apkmirror.com" + vh - if vh.startswith("/") else vh) - print(f" variant_page={variant_page}") + variant_page = "https://www.apkmirror.com" + vh if vh.startswith("/") else vh + log(f"variant_page={variant_page}") - # 3. interstitial + # ── interstitial s3 = soup(variant_page) if not s3: return None btn = s3.find("a", href=re.compile(r"download/\?key=")) if not btn: - print(" no download button") + log("no download button on variant page") return None ih = btn["href"] - interstitial = ("https://www.apkmirror.com" + ih - if ih.startswith("/") else ih) - print(f" interstitial={interstitial}") + interstitial = "https://www.apkmirror.com" + ih if ih.startswith("/") else ih + log(f"interstitial={interstitial}") - # 4. CDN url + # ── CDN url s4 = soup(interstitial) if not s4: return None @@ -188,125 +197,93 @@ def score(item): final = a["href"] break if not final: - print(" no final CDN url") + log("no CDN url found") return None - print(f" final={final}") + log(f"CDN={final[:80]}") - dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ','_')}.apk" - return dest if download(final, dest) else None + dest = DOWNLOAD_DIR / f"{package}.apk" + return dest if download_apk(final, dest) else None -# ── Source 2: Uptodown ───────────────────────────────────────────────────── +# ── Source 2 : Uptodown ─────────────────────────────────────────────────────── -def uptodown(app): - print(" [Uptodown]") +def src_uptodown(app): + log("[Uptodown]") base = app.get("uptodown_dlurl", "").rstrip("/") version = app.get("version") package = app["package"] if not base: - print(" no uptodown_dlurl") + log("no uptodown_dlurl configured") return None - # Uptodown versions page versions_url = f"{base}/versions" - s = soup(versions_url) - if not s: - s = soup(base) + s = soup(versions_url) or soup(base) if not s: return None - # Find download page for the specific version dl_page = None if version: - # version rows look like: /android/post-download/XXXXX for a in s.find_all("a", href=True): - text = a.get_text(strip=True) - if version in text: + parent_text = (a.find_parent() or a).get_text(" ", strip=True) + if version in parent_text and re.search(r"post-download|/download", a["href"]): dl_page = a["href"] break - if not dl_page: - # try direct download URL pattern - # Uptodown stores version in the download slug - for a in s.find_all("a", href=re.compile(r"post-download|/download")): - parent_text = (a.find_parent() or a).get_text(" ", strip=True) - if version in parent_text: - dl_page = a["href"] - break if not dl_page: - # fallback: grab the first download link (latest) a = s.find("a", href=re.compile(r"post-download|/download")) if a: dl_page = a["href"] - if not dl_page: - print(" no download page link found") + log("no download page link found") return None - if not dl_page.startswith("http"): dl_page = urljoin(base, dl_page) - print(f" dl_page={dl_page}") + log(f"dl_page={dl_page}") - # Hit the download page to get the real APK link s2 = soup(dl_page) final = None if s2: - # Uptodown puts the APK link in a button or meta refresh btn = (s2.find("a", id="detail-download-button") or s2.find("a", attrs={"data-url": True}) or s2.find("a", href=re.compile(r"\.apk"))) if btn: final = btn.get("href") or btn.get("data-url", "") - # meta refresh fallback if not final: meta = s2.find("meta", attrs={"http-equiv": "refresh"}) if meta: - content = meta.get("content", "") - m = re.search(r"url=(.+)", content, re.I) + m = re.search(r"url=(.+)", meta.get("content", ""), re.I) if m: final = m.group(1).strip() - if not final: - # last resort: try direct download from Uptodown CDN pattern - # https://{app}.en.uptodown.com/android/download/{id} - print(" no final link from download page, trying direct") + log("falling back to dl_page as direct download") final = dl_page - if not final.startswith("http"): final = urljoin(base, final) - print(f" final={final}") + log(f"final={final[:80]}") - dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ','_')}.apk" - return dest if download(final, dest) else None + dest = DOWNLOAD_DIR / f"{package}.apk" + return dest if download_apk(final, dest) else None -# ── Source 3: APKCombo ───────────────────────────────────────────────────── +# ── Source 3 : APKCombo ─────────────────────────────────────────────────────── -def apkcombo(app): - print(" [APKCombo]") - package = app["package"] - version = app.get("version") +def src_apkcombo(app): + log("[APKCombo]") + package = app["package"] + version = app.get("version") base_url = app.get("apkcombo_url", f"https://apkcombo.com/apk/{package}") - url = base_url - if version: - url = f"{base_url}/{version}" - print(f" url={url}") - - s = soup(url) - if not s: - s = soup(base_url) + url = f"{base_url}/{version}" if version else base_url + log(f"url={url}") + s = soup(url) or soup(base_url) if not s: return None - # APKCombo: look for direct .apk href or a download button final = None for a in s.find_all("a", href=True): - h = a["href"] - if re.search(r"\.apk(\?|$)", h): - final = h + if re.search(r"\.apk(\?|$)", a["href"]): + final = a["href"] break if not final: - # APKCombo download button is often in a form or data attr for tag in s.find_all(attrs={"data-src": re.compile(r"\.apk")}): final = tag["data-src"] break @@ -314,143 +291,176 @@ def apkcombo(app): btn = s.find("a", class_=re.compile(r"download", re.I)) if btn: final = btn.get("href", "") - if not final: - print(" no download link found") + log("no download link found") return None - if not final.startswith("http"): final = "https://apkcombo.com" + final - print(f" final={final}") + log(f"final={final[:80]}") - dest = DOWNLOAD_DIR / f"{package}-{(version or 'latest').replace(' ','_')}.apk" - return dest if download(final, dest) else None + dest = DOWNLOAD_DIR / f"{package}.apk" + return dest if download_apk(final, dest) else None -# ── GitHub Release helpers ───────────────────────────────────────────────── +# ── GitHub Release helpers ──────────────────────────────────────────────────── def gh_headers(): return { "Authorization": f"Bearer {GITHUB_TOKEN}", "Accept": "application/vnd.github+json", - "X-GitHub-Api-Version": "2022-11-28" + "X-GitHub-Api-Version": "2022-11-28", } + def get_or_create_release(): base = f"https://api.github.com/repos/{GITHUB_REPO}" r = requests.get(f"{base}/releases/tags/{RELEASE_TAG}", headers=gh_headers()) if r.status_code == 200: d = r.json() return d["id"], d["upload_url"] + log("Release not found, creating...") r = requests.post(f"{base}/releases", headers=gh_headers(), json={ "tag_name": RELEASE_TAG, "name": "APK Mirror", "body": "Auto-scraped APKs for Morphe patching.", - "prerelease": False + "prerelease": False, }) r.raise_for_status() d = r.json() return d["id"], d["upload_url"] + def list_assets(release_id): r = requests.get( f"https://api.github.com/repos/{GITHUB_REPO}/releases/{release_id}/assets", - headers=gh_headers() + headers=gh_headers(), ) r.raise_for_status() return {a["name"]: a["id"] for a in r.json()} + def delete_asset(asset_id): requests.delete( f"https://api.github.com/repos/{GITHUB_REPO}/releases/assets/{asset_id}", - headers=gh_headers() + headers=gh_headers(), ) + def upload_asset(upload_url, path): url = re.sub(r"\{.*?\}", "", upload_url) h = {**gh_headers(), "Content-Type": "application/vnd.android.package-archive"} + log(f"uploading {path.name} ({path.stat().st_size // 1024 // 1024} MB) ...") with open(path, "rb") as f: - r = requests.post(url, headers=h, - params={"name": path.name}, data=f, timeout=600) + r = requests.post(url, headers=h, params={"name": path.name}, + data=f, timeout=600) if r.status_code in (200, 201): dl = r.json().get("browser_download_url", "") - print(f" uploaded -> {dl}") + ok(f"uploaded → {dl}") return dl - print(f" upload failed {r.status_code}: {r.text[:200]}") + err(f"upload failed {r.status_code}: {r.text[:200]}") return "" -# ── main ─────────────────────────────────────────────────────────────────── +# ── per-app orchestration ───────────────────────────────────────────────────── -def try_sources(app): - for name, fn in [("APKMirror", apkmirror), - ("Uptodown", uptodown), - ("APKCombo", apkcombo)]: +def process_app(app, release_id, upload_url): + name = app["name"] + package = app["package"] + version = app.get("version") or "latest" + banner(f"{name} | {package} | v{version}") + + # try sources in order + apk_path = None + for label, fn in [("APKMirror", src_apkmirror), + ("Uptodown", src_uptodown), + ("APKCombo", src_apkcombo)]: try: result = fn(app) if result and result.exists(): - print(f" -> SUCCESS via {name}") - return result, name + ok(f"got APK via {label}") + apk_path = result + break except Exception as e: - print(f" -> {name} exception: {e}") + err(f"{label} exception: {e}") time.sleep(2) - return None, None + if not apk_path: + err(f"ALL sources failed for {name}") + return False + + # replace old asset in release if present + existing = list_assets(release_id) + if apk_path.name in existing: + log(f"deleting old asset {apk_path.name}") + delete_asset(existing[apk_path.name]) + + dl_url = upload_asset(upload_url, apk_path) + + # delete local file immediately after upload + apk_path.unlink(missing_ok=True) + log(f"deleted local {apk_path.name}") + + return bool(dl_url) + + +# ── final verification ──────────────────────────────────────────────────────── + +def verify_all(apps, release_id): + banner("VERIFICATION") + assets = list_assets(release_id) + missing = [] + present = [] + for app in apps: + fname = f"{app['package']}.apk" + if fname in assets: + ok(fname) + present.append(fname) + else: + err(f"MISSING: {fname}") + missing.append(app["name"]) + print() + print(f" Present : {len(present)}/{len(apps)}") + if missing: + print(f" Missing : {', '.join(missing)}") + return missing + + +# ── main ────────────────────────────────────────────────────────────────────── def main(): if not GITHUB_TOKEN: - print("GITHUB_TOKEN not set") + print("ERROR: GITHUB_TOKEN environment variable not set.") + print("Run: export GITHUB_TOKEN=your_token") sys.exit(1) - apps = load_apps() - print(f"[*] {len(apps)} apps") + with open(APPS_FILE) as f: + apps = json.load(f) + print(f"[*] Loaded {len(apps)} apps from apps.json") release_id, upload_url = get_or_create_release() - print(f"[*] release_id={release_id}") - existing = list_assets(release_id) - print(f"[*] existing assets={len(existing)}") + print(f"[*] Release id={release_id}") - results = [] + failed = [] for app in apps: - name = app["name"] - package = app["package"] - version = app.get("version", "latest") - print(f"\n{'='*60}") - print(f"[APP] {name} | {package} | v{version}") - print(f"{'='*60}") - - apk, source = try_sources(app) - - if not apk: - print(f" FAILED all sources") - results.append({"name": name, "status": "FAILED", "source": None, "url": ""}) - time.sleep(2) - continue - - if apk.name in existing: - print(f" replacing old asset {apk.name}") - delete_asset(existing[apk.name]) - - dl_url = upload_asset(upload_url, apk) - results.append({ - "name": name, "package": package, "version": version, - "source": source, - "status": "OK" if dl_url else "UPLOAD_FAILED", - "url": dl_url - }) - apk.unlink(missing_ok=True) + success = process_app(app, release_id, upload_url) + if not success: + failed.append(app["name"]) time.sleep(3) - with open("scrape_results.json", "w") as f: - json.dump(results, f, indent=2) + missing = verify_all(apps, release_id) - ok = [r for r in results if r["status"] == "OK"] - failed = [r["name"] for r in results if r["status"] != "OK"] - print(f"\n[SUMMARY] OK={len(ok)}/{len(results)}") + banner("SUMMARY") + total = len(apps) + ok_cnt = total - len(failed) + print(f" Scraped & uploaded : {ok_cnt}/{total}") if failed: - print(f"[FAILED] {', '.join(failed)}") + print(f" Failed : {', '.join(failed)}") + if missing: + print(f" Missing in release : {', '.join(missing)}") sys.exit(1) - print("[DONE]") + else: + ok(f"All {total} APKs verified in GitHub release!") + print(f" Release: https://github.com/{GITHUB_REPO}/releases/tag/{RELEASE_TAG}") if __name__ == "__main__": From 7215d94b94ec332608eae365eee355f775ad479e Mon Sep 17 00:00:00 2001 From: myst25 <83259411+myst-25@users.noreply.github.com> Date: Fri, 22 May 2026 23:14:07 +0530 Subject: [PATCH 08/24] Delete .github/workflows/scrape.yml --- .github/workflows/scrape.yml | 46 ------------------------------------ 1 file changed, 46 deletions(-) delete mode 100644 .github/workflows/scrape.yml diff --git a/.github/workflows/scrape.yml b/.github/workflows/scrape.yml deleted file mode 100644 index 4a37670..0000000 --- a/.github/workflows/scrape.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: Scrape APKs - -on: - schedule: - - cron: '0 2 * * *' # Daily at 2 AM UTC - workflow_dispatch: # Manual trigger - -jobs: - scrape: - runs-on: ubuntu-latest - permissions: write-all - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Install dependencies - run: pip install requests beautifulsoup4 - - - name: Run scraper - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_REPOSITORY: ${{ github.repository }} - run: python scraper.py - - - name: Upload scrape results artifact - if: always() - uses: actions/upload-artifact@v4 - with: - name: scrape-results - path: scrape_results.json - retention-days: 7 - - - name: Trigger Morphed-apps build - if: success() - env: - GH_TOKEN: ${{ secrets.MORPHE_PAT }} - run: | - gh workflow run build.yml \ - --repo myst-25/Morphed-apps \ - --ref main From 428de70b5cb356022f9c35e55d79a32ce7431d4a Mon Sep 17 00:00:00 2001 From: myst-25 Date: Wed, 3 Jun 2026 15:44:48 +0530 Subject: [PATCH 09/24] Add automated GitHub Actions workflow and batch download features --- .github/workflows/download_apks.yml | 51 +++++++++++ apkm_converter.py | 55 ++++++++++++ apkmcli | 129 ++++++++++++++++++++++++---- apkmirror.py | 126 ++++++++++++++++----------- apps.json | 5 ++ 5 files changed, 297 insertions(+), 69 deletions(-) create mode 100644 .github/workflows/download_apks.yml create mode 100644 apkm_converter.py create mode 100644 apps.json diff --git a/.github/workflows/download_apks.yml b/.github/workflows/download_apks.yml new file mode 100644 index 0000000..4459b2f --- /dev/null +++ b/.github/workflows/download_apks.yml @@ -0,0 +1,51 @@ +name: APK Downloader + +on: + workflow_dispatch: + inputs: + proxy_url: + description: 'Ngrok Proxy URL (e.g. http://0.tcp.ngrok.io:12345)' + required: true + type: string + +permissions: + contents: write + +jobs: + download-and-release: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + pip install beautifulsoup4 cloudscraper requests + + - name: Run APK Scraper + env: + HTTP_PROXY: ${{ inputs.proxy_url }} + HTTPS_PROXY: ${{ inputs.proxy_url }} + run: | + # Create builds directory to ensure it exists + mkdir -p builds + # Pass '5' for Download All and press enter twice to clear any extra prompts + printf "5\n\n\n\n\n" | python apkmcli + + - name: Get Date + id: date + run: echo "date=$(date +'%Y.%m.%d-%H%M')" >> $GITHUB_OUTPUT + + - name: Create Release and Upload APKs + uses: softprops/action-gh-release@v2 + with: + tag_name: release-${{ steps.date.outputs.date }} + name: APK Builds - ${{ steps.date.outputs.date }} + files: builds/*.apk + body: | + Automated APK fetch via GitHub Actions. diff --git a/apkm_converter.py b/apkm_converter.py new file mode 100644 index 0000000..723ca6f --- /dev/null +++ b/apkm_converter.py @@ -0,0 +1,55 @@ +import os +import zipfile +import shutil +import sys + +def extract_base_apk(bundle_path: str, output_dir: str = "."): + """ + Extracts the base.apk from an .apkm or .apks archive. + """ + if not zipfile.is_zipfile(bundle_path): + print(f"Error: {bundle_path} is not a valid zip archive.") + return None + + extracted_path = None + with zipfile.ZipFile(bundle_path, 'r') as zip_ref: + base_apk_names = [name for name in zip_ref.namelist() if "base.apk" in name.lower()] + + target_name = None + if base_apk_names: + target_name = base_apk_names[0] + else: + apks = [name for name in zip_ref.namelist() if name.endswith('.apk')] + if len(apks) == 1: + target_name = apks[0] + elif apks: + non_splits = [n for n in apks if "split" not in n.lower() and "config" not in n.lower()] + if non_splits: + target_name = non_splits[0] + else: + target_name = apks[0] + + if target_name: + print(f"Found base APK candidate: {target_name} inside the bundle.") + zip_ref.extract(target_name, path=output_dir) + extracted_path = os.path.join(output_dir, target_name) + + base_name = os.path.basename(bundle_path) + new_name = os.path.splitext(base_name)[0] + "_base.apk" + new_path = os.path.join(output_dir, new_name) + + if os.path.abspath(extracted_path) != os.path.abspath(new_path): + shutil.move(extracted_path, new_path) + + print(f"Successfully extracted base APK to {new_path}") + return new_path + else: + print("Could not find a base APK inside the bundle.") + return None + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python apkm_converter.py ") + sys.exit(1) + + extract_base_apk(sys.argv[1]) diff --git a/apkmcli b/apkmcli index 8da30b2..e325b97 100755 --- a/apkmcli +++ b/apkmcli @@ -1,36 +1,129 @@ #!/usr/bin/env python3 import sys +import os +import re +import glob from apkmirror import APKMirror +import apkm_converter apkm = APKMirror(timeout=3, results=5) -search_query = input("Search:\n -> ") -results = apkm.search(search_query) +def download_app(app_name, version): + search_query = f"{app_name} {version}".strip() + print(f"\n--- Processing {app_name} ---") + results = apkm.search(search_query) -for result in enumerate(results): - print(f"[{result[0]}] {result[1]['name']}") + if not results: + print(f"No results found for {search_query}.") + return -download_id = int(input("Enter number to get details, or 99 to exit:\n -> ")) + filtered_results = [] + for r in results: + name = r['name'] + if name.startswith(app_name + " "): + remainder = name[len(app_name):].strip() + if remainder and remainder[0].isdigit(): + filtered_results.append(r) + + if not filtered_results: + filtered_results = results -if download_id == 99: - sys.exit("Exit") + print(f"Found latest release: {filtered_results[0]['name']}") + app_details = apkm.get_app_details(filtered_results[0]["link"]) -app_details = apkm.get_app_details(results[download_id]["link"]) + if not app_details: + print("Could not find suitable variants.") + return -print(f"This app is for \"{app_details['architecture']}\" devices, running {app_details['android_version']} with {app_details['dpi']} DPI") + print(f"Selected best variant: {app_details['architecture']} ({app_details['type']})") -ask_download = input("Do you want to download it? (y/n)\n -> ") - -if ask_download.lower() in ("y", ""): app_link = app_details["download_link"] - print(f"Trying to get direct link, if the script cant get it, download by visiting this (not direct url): {app_link}") - direct_link = apkm.get_download_link(app_link) + direct_url = apkm.get_direct_download_link(direct_link) + + print(f"Downloading from {direct_url}...") + + dl_resp = apkm.scraper.get(direct_url, stream=True, headers=apkm.headers) + + builds_dir = "builds" + os.makedirs(builds_dir, exist_ok=True) + + clean_app_name = app_name.replace(' ', '_') + old_files = glob.glob(os.path.join(builds_dir, f"{clean_app_name}_*_myst25.*")) + for old_file in old_files: + try: + os.remove(old_file) + print(f"Deleted old version: {old_file}") + except Exception as e: + print(f"Failed to delete {old_file}: {e}") + + ext = ".apk" + cd = dl_resp.headers.get('content-disposition') + if cd: + fname = re.findall('filename=(.+)', cd) + if len(fname) > 0: + original_filename = fname[0].strip('"') + ext = os.path.splitext(original_filename)[1] + + clean_name = filtered_results[0]['name'].replace(' ', '_') + filename = os.path.join(builds_dir, f"{clean_name}_myst25{ext}") + + print(f"Saving to {filename}...") + with open(filename, 'wb') as f: + for chunk in dl_resp.iter_content(chunk_size=8192): + f.write(chunk) + + print(f"Downloaded {filename}") + + if filename.endswith(".apkm") or filename.endswith(".apks"): + print("Bundle detected. Extracting base APK...") + extracted = apkm_converter.extract_base_apk(filename, output_dir=builds_dir) + if extracted: + final_apk_name = os.path.join(builds_dir, f"{clean_name}_myst25.apk") + if os.path.abspath(extracted) != os.path.abspath(final_apk_name): + os.rename(extracted, final_apk_name) + os.remove(filename) + print(f"Ready: {final_apk_name}") + else: + print("Extraction failed.") + else: + print(f"Ready: {filename}") + +def main(): + import json + apps = ["YouTube", "YouTube Music", "X"] + print("Select an app to search:") + for i, app in enumerate(apps): + print(f"[{i+1}] {app}") + print(f"[{len(apps)+1}] Download All (from apps.json)") + print(f"[{len(apps)+2}] Custom Search") + + try: + app_choice = int(input(" -> ")) - 1 + except ValueError: + sys.exit("Invalid choice") - print("Got the link i neded, trying to get a direct link...") - sys.exit(f"Done. Direct url: {apkm.get_direct_download_link(direct_link)}") + if 0 <= app_choice < len(apps): + app_name = apps[app_choice] + version = input(f"Enter version for {app_name} (or leave blank for latest):\n -> ") + download_app(app_name, version) + elif app_choice == len(apps): + try: + with open("apps.json", "r") as f: + apps_config = json.load(f) + except Exception as e: + sys.exit(f"Failed to read apps.json: {e}") + + for app_name, version in apps_config.items(): + download_app(app_name, version) + elif app_choice == len(apps) + 1: + app_name = input("Enter app name to search:\n -> ") + version = input(f"Enter version for {app_name} (or leave blank for latest):\n -> ") + download_app(app_name, version) + else: + sys.exit("Invalid choice") -else: - sys.exit("Exit") +if __name__ == "__main__": + main() diff --git a/apkmirror.py b/apkmirror.py index a75f756..9c1886d 100644 --- a/apkmirror.py +++ b/apkmirror.py @@ -69,57 +69,81 @@ def get_app_details(self, app_link): soup = BeautifulSoup(resp.text, "html.parser") - data = soup.find_all("div", {"class": ["table-row", "headerFont"]})[1] - - architecture = data.find_all( - "div", - { - "class": [ - "table-cell", - "rowheight", - "addseparator", - "expand", - "pad", - "dowrap", - ] - }, - )[1].text.strip() - android_version = data.find_all( - "div", - { - "class": [ - "table-cell", - "rowheight", - "addseparator", - "expand", - "pad", - "dowrap", - ] - }, - )[2].text.strip() - dpi = data.find_all( - "div", - { - "class": [ - "table-cell", - "rowheight", - "addseparator", - "expand", - "pad", - "dowrap", - ] - }, - )[3].text.strip() - download_link = ( - self.base_url + data.find_all("a", {"class": "accent_color"})[0]["href"] - ) - - return { - "architecture": architecture, - "android_version": android_version, - "dpi": dpi, - "download_link": download_link, - } + rows = soup.find_all("div", {"class": ["table-row", "headerFont"]}) + + variants = [] + for i, row in enumerate(rows): + if i == 0: + continue # Skip header + + cells = row.find_all( + "div", + { + "class": [ + "table-cell", + "rowheight", + "addseparator", + "expand", + "pad", + "dowrap", + ] + }, + ) + if len(cells) < 4: + continue + + arch = cells[1].text.strip() + android_version = cells[2].text.strip() + dpi = cells[3].text.strip() + + is_bundle = "APK" + badge = cells[0].find("span", {"class": "apkm-badge"}) + if badge: + is_bundle = badge.text.strip() + + link_elem = row.find_all("a", {"class": "accent_color"}) + download_link = self.base_url + link_elem[0]["href"] if link_elem else None + + if download_link: + variants.append({ + "architecture": arch, + "android_version": android_version, + "dpi": dpi, + "download_link": download_link, + "type": is_bundle + }) + + if not variants: + return None + + # Priority ranking: + # 1. APK + universal + # 2. APK + any + # 3. BUNDLE + universal + # 4. BUNDLE + any + + best_variant = None + for variant in variants: + if variant["type"] == "APK" and variant["architecture"].lower() == "universal": + best_variant = variant + break + + if not best_variant: + for variant in variants: + if variant["type"] == "APK": + best_variant = variant + break + + if not best_variant: + for variant in variants: + if variant["type"] == "BUNDLE" and variant["architecture"].lower() == "universal": + best_variant = variant + break + + if not best_variant: + best_variant = variants[0] + + return best_variant def get_download_link(self, app_download_link): print("[get_download_link] Sleeping...") diff --git a/apps.json b/apps.json new file mode 100644 index 0000000..67ec230 --- /dev/null +++ b/apps.json @@ -0,0 +1,5 @@ +{ + "YouTube": "", + "YouTube Music": "", + "X": "" +} From d4476e83eac5c4088724b66a2d7fceb2bc807b3d Mon Sep 17 00:00:00 2001 From: myst-25 Date: Wed, 3 Jun 2026 15:47:35 +0530 Subject: [PATCH 10/24] Update apkmcli for automated runs and add cron schedule --- .github/workflows/download_apks.yml | 10 ++++++---- apkmcli | 12 ++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/.github/workflows/download_apks.yml b/.github/workflows/download_apks.yml index 4459b2f..916ed2b 100644 --- a/.github/workflows/download_apks.yml +++ b/.github/workflows/download_apks.yml @@ -1,11 +1,13 @@ name: APK Downloader on: + schedule: + - cron: '0 0 * * *' # Runs every day at midnight UTC workflow_dispatch: inputs: proxy_url: - description: 'Ngrok Proxy URL (e.g. http://0.tcp.ngrok.io:12345)' - required: true + description: 'Ngrok Proxy URL (Optional - leave blank to use GitHub IP)' + required: false type: string permissions: @@ -34,8 +36,8 @@ jobs: run: | # Create builds directory to ensure it exists mkdir -p builds - # Pass '5' for Download All and press enter twice to clear any extra prompts - printf "5\n\n\n\n\n" | python apkmcli + # Run script in automated mode + python apkmcli --download-all - name: Get Date id: date diff --git a/apkmcli b/apkmcli index e325b97..f245c71 100755 --- a/apkmcli +++ b/apkmcli @@ -93,6 +93,18 @@ def download_app(app_name, version): def main(): import json + + # Check for non-interactive flag + if len(sys.argv) > 1 and sys.argv[1] == "--download-all": + try: + with open("apps.json", "r") as f: + apps_config = json.load(f) + except Exception as e: + sys.exit(f"Failed to read apps.json: {e}") + for app_name, version in apps_config.items(): + download_app(app_name, version) + return + apps = ["YouTube", "YouTube Music", "X"] print("Select an app to search:") for i, app in enumerate(apps): From a68ca6f85fbd22e1f4bdca09044b16113f2b45f9 Mon Sep 17 00:00:00 2001 From: myst25 <83259411+myst-25@users.noreply.github.com> Date: Wed, 3 Jun 2026 15:59:48 +0530 Subject: [PATCH 11/24] Update apps.json --- apps.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps.json b/apps.json index 67ec230..b9c8435 100644 --- a/apps.json +++ b/apps.json @@ -1,5 +1,5 @@ { - "YouTube": "", - "YouTube Music": "", - "X": "" + "YouTube": "21.21.80", + "YouTube Music": "9.21.51", + "X": "11.91.0-release-ripped.0" } From 6d9082b92aa0d97a6e65d1c66b792f658f6ae2d1 Mon Sep 17 00:00:00 2001 From: myst-25 Date: Wed, 3 Jun 2026 16:09:19 +0530 Subject: [PATCH 12/24] Migrate to Tailscale VPN for unlimited proxy bandwidth --- .github/workflows/download_apks.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/download_apks.yml b/.github/workflows/download_apks.yml index 916ed2b..4080b64 100644 --- a/.github/workflows/download_apks.yml +++ b/.github/workflows/download_apks.yml @@ -4,11 +4,6 @@ on: schedule: - cron: '0 0 * * *' # Runs every day at midnight UTC workflow_dispatch: - inputs: - proxy_url: - description: 'Ngrok Proxy URL (Optional - leave blank to use GitHub IP)' - required: false - type: string permissions: contents: write @@ -20,6 +15,11 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 + - name: Set up Tailscale + uses: tailscale/github-action@v2 + with: + authkey: ${{ secrets.TAILSCALE_AUTHKEY }} + - name: Set up Python uses: actions/setup-python@v5 with: @@ -31,8 +31,8 @@ jobs: - name: Run APK Scraper env: - HTTP_PROXY: ${{ inputs.proxy_url }} - HTTPS_PROXY: ${{ inputs.proxy_url }} + HTTP_PROXY: http://100.90.77.61:8899 + HTTPS_PROXY: http://100.90.77.61:8899 run: | # Create builds directory to ensure it exists mkdir -p builds @@ -50,4 +50,4 @@ jobs: name: APK Builds - ${{ steps.date.outputs.date }} files: builds/*.apk body: | - Automated APK fetch via GitHub Actions. + Automated APK fetch via GitHub Actions through Tailscale Proxy. From 2515d4bdd6f749bc9d364f6467b5ffc7b709700c Mon Sep 17 00:00:00 2001 From: AI Bot Date: Thu, 4 Jun 2026 20:55:25 +0530 Subject: [PATCH 13/24] Update APK filenames to match standard format [package.name]-[version]-all.apk --- apkmcli | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/apkmcli b/apkmcli index f245c71..a1cdc56 100755 --- a/apkmcli +++ b/apkmcli @@ -50,8 +50,20 @@ def download_app(app_name, version): builds_dir = "builds" os.makedirs(builds_dir, exist_ok=True) - clean_app_name = app_name.replace(' ', '_') - old_files = glob.glob(os.path.join(builds_dir, f"{clean_app_name}_*_myst25.*")) + PACKAGE_NAMES = { + "YouTube": "com.google.android.youtube", + "YouTube Music": "com.google.android.apps.youtube.music", + "X": "com.twitter.android", + "Instagram": "com.instagram.android" + } + + pkg_name_for_delete = PACKAGE_NAMES.get(app_name) + if pkg_name_for_delete: + old_files = glob.glob(os.path.join(builds_dir, f"{pkg_name_for_delete}-*-all.*")) + else: + clean_app_name = app_name.replace(' ', '_') + old_files = glob.glob(os.path.join(builds_dir, f"{clean_app_name}_*_myst25.*")) + for old_file in old_files: try: os.remove(old_file) @@ -59,6 +71,25 @@ def download_app(app_name, version): except Exception as e: print(f"Failed to delete {old_file}: {e}") + + + # Extract clean version without spaces + clean_version = version.replace(' ', '_') + if not clean_version: + # If version was blank, try to extract it from the scraped name (e.g. "YouTube 21.21.80") + name_parts = filtered_results[0]['name'].split() + clean_version = name_parts[-1] if name_parts else "latest" + + pkg_name = PACKAGE_NAMES.get(app_name) + + if pkg_name: + # Use exact requested format: com.google.android.youtube-21.18.168-all.apk + base_filename = f"{pkg_name}-{clean_version}-all" + else: + # Fallback to old format if package name is unknown + clean_name = filtered_results[0]['name'].replace(' ', '_') + base_filename = f"{clean_name}_myst25" + ext = ".apk" cd = dl_resp.headers.get('content-disposition') if cd: @@ -67,8 +98,7 @@ def download_app(app_name, version): original_filename = fname[0].strip('"') ext = os.path.splitext(original_filename)[1] - clean_name = filtered_results[0]['name'].replace(' ', '_') - filename = os.path.join(builds_dir, f"{clean_name}_myst25{ext}") + filename = os.path.join(builds_dir, f"{base_filename}{ext}") print(f"Saving to {filename}...") with open(filename, 'wb') as f: @@ -81,7 +111,7 @@ def download_app(app_name, version): print("Bundle detected. Extracting base APK...") extracted = apkm_converter.extract_base_apk(filename, output_dir=builds_dir) if extracted: - final_apk_name = os.path.join(builds_dir, f"{clean_name}_myst25.apk") + final_apk_name = os.path.join(builds_dir, f"{base_filename}.apk") if os.path.abspath(extracted) != os.path.abspath(final_apk_name): os.rename(extracted, final_apk_name) os.remove(filename) From 2fec45c0c958453c638aa4dc62118a073460ac1d Mon Sep 17 00:00:00 2001 From: AI Bot Date: Thu, 4 Jun 2026 20:58:40 +0530 Subject: [PATCH 14/24] Fix X version string in apps.json --- apps.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps.json b/apps.json index b9c8435..02442a2 100644 --- a/apps.json +++ b/apps.json @@ -1,5 +1,5 @@ { "YouTube": "21.21.80", "YouTube Music": "9.21.51", - "X": "11.91.0-release-ripped.0" + "X": "11.91.0-release.0" } From e69a1be9dc9a0fe5c27a341a258f4e89312df87d Mon Sep 17 00:00:00 2001 From: AI Bot Date: Thu, 4 Jun 2026 22:19:12 +0530 Subject: [PATCH 15/24] Prioritize universal architecture and nodpi for maximum device compatibility --- apkmirror.py | 45 +++++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/apkmirror.py b/apkmirror.py index 9c1886d..fd33ba0 100644 --- a/apkmirror.py +++ b/apkmirror.py @@ -116,32 +116,25 @@ def get_app_details(self, app_link): if not variants: return None - # Priority ranking: - # 1. APK + universal - # 2. APK + any - # 3. BUNDLE + universal - # 4. BUNDLE + any - - best_variant = None - for variant in variants: - if variant["type"] == "APK" and variant["architecture"].lower() == "universal": - best_variant = variant - break - - if not best_variant: - for variant in variants: - if variant["type"] == "APK": - best_variant = variant - break - - if not best_variant: - for variant in variants: - if variant["type"] == "BUNDLE" and variant["architecture"].lower() == "universal": - best_variant = variant - break - - if not best_variant: - best_variant = variants[0] + def score_variant(v): + score = 0 + # Prioritize APK over BUNDLE + if v["type"] == "APK": score += 1000 + elif v["type"] == "BUNDLE": score += 500 + + # Prioritize universal architecture + arch = v["architecture"].lower() + if arch == "universal": score += 100 + elif "arm64-v8a" in arch: score += 50 + elif "armeabi-v7a" in arch: score += 10 + + # Prioritize nodpi for maximum compatibility + dpi = v["dpi"].lower() + if dpi == "nodpi": score += 20 + + return score + + best_variant = max(variants, key=score_variant) return best_variant From e7f61611e57ed4f1b6a9282cca8203b351a7f03d Mon Sep 17 00:00:00 2001 From: AI Bot Date: Fri, 5 Jun 2026 00:14:21 +0530 Subject: [PATCH 16/24] Add UptoDown source for Instagram + blocked apps, smart dual-source routing --- .github/workflows/download_apks.yml | 2 - apkmcli | 101 ++++++++++++------- apps.json | 3 +- uptodown.py | 147 ++++++++++++++++++++++++++++ 4 files changed, 215 insertions(+), 38 deletions(-) create mode 100644 uptodown.py diff --git a/.github/workflows/download_apks.yml b/.github/workflows/download_apks.yml index 4080b64..e8c0ae1 100644 --- a/.github/workflows/download_apks.yml +++ b/.github/workflows/download_apks.yml @@ -1,8 +1,6 @@ name: APK Downloader on: - schedule: - - cron: '0 0 * * *' # Runs every day at midnight UTC workflow_dispatch: permissions: diff --git a/apkmcli b/apkmcli index a1cdc56..f89f1dc 100755 --- a/apkmcli +++ b/apkmcli @@ -4,20 +4,54 @@ import sys import os import re import glob +import json from apkmirror import APKMirror +from uptodown import UptoDown import apkm_converter apkm = APKMirror(timeout=3, results=5) +uptodown = UptoDown(timeout=2) + +PACKAGE_NAMES = { + "YouTube": "com.google.android.youtube", + "YouTube Music": "com.google.android.apps.youtube.music", + "X": "com.twitter.android", + "Instagram": "com.instagram.android", + "TikTok": "com.zhiliaoapp.musically", + "WhatsApp": "com.whatsapp", + "Snapchat": "com.snapchat.android", + "Facebook": "com.facebook.katana", + "Telegram": "org.telegram.messenger", + "Spotify": "com.spotify.music", +} + +# Apps that are blocked on APKMirror by Cloudflare — use UptoDown for these +UPTODOWN_ONLY = {"Instagram", "Facebook", "WhatsApp", "Snapchat", "TikTok"} + + +def download_via_uptodown(app_name): + """Download latest APK via UptoDown (no Cloudflare restrictions).""" + print(f"\n--- Processing {app_name} via UptoDown ---") + pkg_name = PACKAGE_NAMES.get(app_name) + filename = uptodown.download(app_name, output_dir="builds", package_name=pkg_name) + if filename: + print(f"Ready: {filename}") + else: + print(f"Failed to download {app_name} via UptoDown.") + return filename -def download_app(app_name, version): + +def download_via_apkmirror(app_name, version): + """Download specific version APK via APKMirror.""" search_query = f"{app_name} {version}".strip() - print(f"\n--- Processing {app_name} ---") + print(f"\n--- Processing {app_name} via APKMirror ---") results = apkm.search(search_query) if not results: - print(f"No results found for {search_query}.") - return + print(f"No results found for '{search_query}' on APKMirror.") + print(f"Falling back to UptoDown for {app_name}...") + return download_via_uptodown(app_name) filtered_results = [] for r in results: @@ -26,7 +60,7 @@ def download_app(app_name, version): remainder = name[len(app_name):].strip() if remainder and remainder[0].isdigit(): filtered_results.append(r) - + if not filtered_results: filtered_results = results @@ -34,8 +68,8 @@ def download_app(app_name, version): app_details = apkm.get_app_details(filtered_results[0]["link"]) if not app_details: - print("Could not find suitable variants.") - return + print("Could not find suitable variants on APKMirror. Falling back to UptoDown...") + return download_via_uptodown(app_name) print(f"Selected best variant: {app_details['architecture']} ({app_details['type']})") @@ -49,17 +83,11 @@ def download_app(app_name, version): builds_dir = "builds" os.makedirs(builds_dir, exist_ok=True) - - PACKAGE_NAMES = { - "YouTube": "com.google.android.youtube", - "YouTube Music": "com.google.android.apps.youtube.music", - "X": "com.twitter.android", - "Instagram": "com.instagram.android" - } - - pkg_name_for_delete = PACKAGE_NAMES.get(app_name) - if pkg_name_for_delete: - old_files = glob.glob(os.path.join(builds_dir, f"{pkg_name_for_delete}-*-all.*")) + + pkg_name = PACKAGE_NAMES.get(app_name) + + if pkg_name: + old_files = glob.glob(os.path.join(builds_dir, f"{pkg_name}-*-all.*")) else: clean_app_name = app_name.replace(' ', '_') old_files = glob.glob(os.path.join(builds_dir, f"{clean_app_name}_*_myst25.*")) @@ -71,22 +99,14 @@ def download_app(app_name, version): except Exception as e: print(f"Failed to delete {old_file}: {e}") - - - # Extract clean version without spaces clean_version = version.replace(' ', '_') if not clean_version: - # If version was blank, try to extract it from the scraped name (e.g. "YouTube 21.21.80") name_parts = filtered_results[0]['name'].split() clean_version = name_parts[-1] if name_parts else "latest" - - pkg_name = PACKAGE_NAMES.get(app_name) - + if pkg_name: - # Use exact requested format: com.google.android.youtube-21.18.168-all.apk base_filename = f"{pkg_name}-{clean_version}-all" else: - # Fallback to old format if package name is unknown clean_name = filtered_results[0]['name'].replace(' ', '_') base_filename = f"{clean_name}_myst25" @@ -120,10 +140,18 @@ def download_app(app_name, version): print("Extraction failed.") else: print(f"Ready: {filename}") + return filename + + +def download_app(app_name, version=""): + """Smart downloader: uses UptoDown for Cloudflare-blocked apps, APKMirror otherwise.""" + if app_name in UPTODOWN_ONLY: + return download_via_uptodown(app_name) + else: + return download_via_apkmirror(app_name, version) + def main(): - import json - # Check for non-interactive flag if len(sys.argv) > 1 and sys.argv[1] == "--download-all": try: @@ -135,10 +163,11 @@ def main(): download_app(app_name, version) return - apps = ["YouTube", "YouTube Music", "X"] - print("Select an app to search:") + apps = ["YouTube", "YouTube Music", "X", "Instagram"] + print("Select an app to download:") for i, app in enumerate(apps): - print(f"[{i+1}] {app}") + src = "UptoDown" if app in UPTODOWN_ONLY else "APKMirror" + print(f"[{i+1}] {app} (via {src})") print(f"[{len(apps)+1}] Download All (from apps.json)") print(f"[{len(apps)+2}] Custom Search") @@ -149,15 +178,17 @@ def main(): if 0 <= app_choice < len(apps): app_name = apps[app_choice] - version = input(f"Enter version for {app_name} (or leave blank for latest):\n -> ") - download_app(app_name, version) + if app_name in UPTODOWN_ONLY: + download_app(app_name) + else: + version = input(f"Enter version for {app_name} (or leave blank for latest):\n -> ") + download_app(app_name, version) elif app_choice == len(apps): try: with open("apps.json", "r") as f: apps_config = json.load(f) except Exception as e: sys.exit(f"Failed to read apps.json: {e}") - for app_name, version in apps_config.items(): download_app(app_name, version) elif app_choice == len(apps) + 1: diff --git a/apps.json b/apps.json index 02442a2..de0316e 100644 --- a/apps.json +++ b/apps.json @@ -1,5 +1,6 @@ { "YouTube": "21.21.80", "YouTube Music": "9.21.51", - "X": "11.91.0-release.0" + "X": "11.91.0-release.0", + "Instagram": "latest" } diff --git a/uptodown.py b/uptodown.py new file mode 100644 index 0000000..4e0869c --- /dev/null +++ b/uptodown.py @@ -0,0 +1,147 @@ +""" +UptoDown APK scraper — works for ALL apps including Instagram +that are blocked on APKMirror by Cloudflare Turnstile. + +URL pattern: https://[app-slug].en.uptodown.com/android/download +Download URL: https://dw.uptodown.com/dwn/[token] +""" + +import time +from urllib.parse import quote_plus +from bs4 import BeautifulSoup +import cloudscraper + + +# Map known app names to their UptoDown slugs +APP_SLUGS = { + "YouTube": "youtube", + "YouTube Music": "youtube-music", + "X": "twitter", + "Instagram": "instagram", + "TikTok": "tik-tok", + "WhatsApp": "whatsapp", + "Snapchat": "snapchat", + "Facebook": "facebook", + "Telegram": "telegram", + "Spotify": "spotify", +} + + +class UptoDown: + def __init__(self, timeout=2): + self.timeout = timeout + self.base_url = "https://en.uptodown.com" + self.dw_base = "https://dw.uptodown.com/dwn" + self.headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", + "Accept-Language": "en-US,en;q=0.9", + "Referer": "https://en.uptodown.com/", + } + self.scraper = cloudscraper.create_scraper() + + def _get(self, url): + time.sleep(self.timeout) + return self.scraper.get(url, headers=self.headers, timeout=15) + + def get_app_slug(self, app_name): + """Get the UptoDown slug for an app name, searching if not in known list.""" + slug = APP_SLUGS.get(app_name) + if slug: + return slug + # Try converting app name to a slug guess + return app_name.lower().replace(" ", "-") + + def get_download_info(self, app_name): + """Get the latest version and direct download URL for an app.""" + slug = self.get_app_slug(app_name) + app_url = f"https://{slug}.en.uptodown.com/android" + download_page_url = f"{app_url}/download" + + print(f"[uptodown] Fetching {download_page_url}") + r = self._get(download_page_url) + + if r.status_code != 200: + print(f"[uptodown] Failed: status {r.status_code}") + return None + + soup = BeautifulSoup(r.text, "html.parser") + + # Get the download token from the button + btn = soup.find("button", {"id": "detail-download-button"}) + if not btn: + print("[uptodown] Could not find download button") + return None + + token = btn.get("data-url") + if not token: + print("[uptodown] No download token found") + return None + + # Get version info + version = None + version_el = soup.find("span", {"itemprop": "version"}) + if not version_el: + version_el = soup.find("div", {"class": "version"}) + if version_el: + version = version_el.text.strip() + + direct_url = f"{self.dw_base}/{token}" + + return { + "app_name": app_name, + "version": version or "unknown", + "download_url": direct_url, + "app_page": app_url, + } + + def download(self, app_name, output_dir="builds", package_name=None): + """Download the latest APK for an app.""" + import os + + info = self.get_download_info(app_name) + if not info: + print(f"[uptodown] Could not get download info for {app_name}") + return None + + version = info["version"] + print(f"[uptodown] Downloading {app_name} {version} from {info['download_url']}") + + # Build filename using package name if provided + os.makedirs(output_dir, exist_ok=True) + if package_name: + filename = os.path.join(output_dir, f"{package_name}-{version}-all.apk") + else: + clean = app_name.replace(" ", "_") + filename = os.path.join(output_dir, f"{clean}-{version}-all.apk") + + # Delete old versions + import glob + if package_name: + old_files = glob.glob(os.path.join(output_dir, f"{package_name}-*-all.*")) + else: + old_files = glob.glob(os.path.join(output_dir, f"{app_name.replace(' ', '_')}-*-all.*")) + for f in old_files: + try: + os.remove(f) + print(f"[uptodown] Removed old: {f}") + except Exception: + pass + + r = self.scraper.get(info["download_url"], headers=self.headers, stream=True, timeout=60) + if r.status_code != 200: + print(f"[uptodown] Download failed: status {r.status_code}") + return None + + # Try to get filename from the URL (e.g. instagram-433-0.0.4-68.apk) + final_url = r.url + url_filename = final_url.split("/")[-1] + if url_filename.endswith(".apk") and package_name: + filename = os.path.join(output_dir, f"{package_name}-{version}-all.apk") + + print(f"[uptodown] Saving to {filename}...") + with open(filename, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + + print(f"[uptodown] Done: {filename}") + return filename From 64a5b9749172e505853f09771154082688a67977 Mon Sep 17 00:00:00 2001 From: AI Bot Date: Fri, 5 Jun 2026 00:21:40 +0530 Subject: [PATCH 17/24] =?UTF-8?q?Auto-fallback:=20APKMirror=20blocked=20?= =?UTF-8?q?=E2=86=92=20switch=20to=20UptoDown=20for=20any=20searched=20app?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apkmcli | 227 ++++++++++++++++++++++++++------------------------- apkmirror.py | 5 ++ uptodown.py | 41 +++++++++- 3 files changed, 160 insertions(+), 113 deletions(-) diff --git a/apkmcli b/apkmcli index f89f1dc..de4e264 100755 --- a/apkmcli +++ b/apkmcli @@ -13,6 +13,7 @@ import apkm_converter apkm = APKMirror(timeout=3, results=5) uptodown = UptoDown(timeout=2) +# Known package names for standard filename format PACKAGE_NAMES = { "YouTube": "com.google.android.youtube", "YouTube Music": "com.google.android.apps.youtube.music", @@ -26,133 +27,124 @@ PACKAGE_NAMES = { "Spotify": "com.spotify.music", } -# Apps that are blocked on APKMirror by Cloudflare — use UptoDown for these -UPTODOWN_ONLY = {"Instagram", "Facebook", "WhatsApp", "Snapchat", "TikTok"} - -def download_via_uptodown(app_name): - """Download latest APK via UptoDown (no Cloudflare restrictions).""" - print(f"\n--- Processing {app_name} via UptoDown ---") - pkg_name = PACKAGE_NAMES.get(app_name) +def download_via_uptodown(app_name, pkg_name=None): + """Download latest APK via UptoDown.""" + print(f"[→ UptoDown] Downloading {app_name}...") filename = uptodown.download(app_name, output_dir="builds", package_name=pkg_name) if filename: - print(f"Ready: {filename}") + print(f"✓ Ready: {filename}") else: - print(f"Failed to download {app_name} via UptoDown.") + print(f"✗ Could not download {app_name} from UptoDown either.") return filename -def download_via_apkmirror(app_name, version): - """Download specific version APK via APKMirror.""" +def download_app(app_name, version=""): + """ + Try APKMirror first. If it blocks (403 / no results), auto-switch to UptoDown. + Works for ANY app the user searches — not just the hardcoded list. + """ + pkg_name = PACKAGE_NAMES.get(app_name) + builds_dir = "builds" + os.makedirs(builds_dir, exist_ok=True) + + print(f"\n{'='*50}") + print(f" Searching: {app_name}{' ' + version if version else ''}") + print(f"{'='*50}") + + # --- Try APKMirror first --- search_query = f"{app_name} {version}".strip() - print(f"\n--- Processing {app_name} via APKMirror ---") results = apkm.search(search_query) - if not results: - print(f"No results found for '{search_query}' on APKMirror.") - print(f"Falling back to UptoDown for {app_name}...") - return download_via_uptodown(app_name) + # None = blocked by Cloudflare → auto-fallback + if results is None: + print(f"⚠ APKMirror blocked '{app_name}'. Switching to UptoDown automatically...") + return download_via_uptodown(app_name, pkg_name) - filtered_results = [] - for r in results: - name = r['name'] - if name.startswith(app_name + " "): - remainder = name[len(app_name):].strip() - if remainder and remainder[0].isdigit(): - filtered_results.append(r) + # Empty results = not found on APKMirror → try UptoDown + if not results: + print(f"⚠ No results for '{app_name}' on APKMirror. Trying UptoDown...") + return download_via_uptodown(app_name, pkg_name) - if not filtered_results: - filtered_results = results + # Filter results to best match + filtered = [r for r in results if r['name'].startswith(app_name + " ") and + r['name'][len(app_name):].strip() and r['name'][len(app_name):].strip()[0].isdigit()] + if not filtered: + filtered = results - print(f"Found latest release: {filtered_results[0]['name']}") - app_details = apkm.get_app_details(filtered_results[0]["link"]) + print(f"[APKMirror] Found: {filtered[0]['name']}") + app_details = apkm.get_app_details(filtered[0]["link"]) if not app_details: - print("Could not find suitable variants on APKMirror. Falling back to UptoDown...") - return download_via_uptodown(app_name) - - print(f"Selected best variant: {app_details['architecture']} ({app_details['type']})") + print(f"⚠ Could not get variant details from APKMirror. Switching to UptoDown...") + return download_via_uptodown(app_name, pkg_name) - app_link = app_details["download_link"] - direct_link = apkm.get_download_link(app_link) - direct_url = apkm.get_direct_download_link(direct_link) - - print(f"Downloading from {direct_url}...") + print(f"[APKMirror] Variant: {app_details['architecture']} / {app_details['dpi']} ({app_details['type']})") + # Get download URLs + try: + app_link = app_details["download_link"] + direct_link = apkm.get_download_link(app_link) + direct_url = apkm.get_direct_download_link(direct_link) + except Exception as e: + print(f"⚠ Failed to get download link from APKMirror ({e}). Switching to UptoDown...") + return download_via_uptodown(app_name, pkg_name) + + print(f"[APKMirror] Downloading...") dl_resp = apkm.scraper.get(direct_url, stream=True, headers=apkm.headers) - builds_dir = "builds" - os.makedirs(builds_dir, exist_ok=True) - - pkg_name = PACKAGE_NAMES.get(app_name) - + # Delete old versions if pkg_name: old_files = glob.glob(os.path.join(builds_dir, f"{pkg_name}-*-all.*")) else: - clean_app_name = app_name.replace(' ', '_') - old_files = glob.glob(os.path.join(builds_dir, f"{clean_app_name}_*_myst25.*")) - - for old_file in old_files: + old_files = glob.glob(os.path.join(builds_dir, f"{app_name.replace(' ', '_')}-*.*")) + for f in old_files: try: - os.remove(old_file) - print(f"Deleted old version: {old_file}") - except Exception as e: - print(f"Failed to delete {old_file}: {e}") - - clean_version = version.replace(' ', '_') - if not clean_version: - name_parts = filtered_results[0]['name'].split() - clean_version = name_parts[-1] if name_parts else "latest" + os.remove(f) + except Exception: + pass + # Build filename + clean_version = version.strip() or filtered[0]['name'].split()[-1] if pkg_name: base_filename = f"{pkg_name}-{clean_version}-all" else: - clean_name = filtered_results[0]['name'].replace(' ', '_') - base_filename = f"{clean_name}_myst25" + base_filename = f"{filtered[0]['name'].replace(' ', '_')}" ext = ".apk" - cd = dl_resp.headers.get('content-disposition') - if cd: - fname = re.findall('filename=(.+)', cd) - if len(fname) > 0: - original_filename = fname[0].strip('"') - ext = os.path.splitext(original_filename)[1] + cd = dl_resp.headers.get('content-disposition', '') + match = re.findall(r'filename=(.+)', cd) + if match: + ext = os.path.splitext(match[0].strip('"'))[1] or ".apk" filename = os.path.join(builds_dir, f"{base_filename}{ext}") + print(f"[APKMirror] Saving to {filename}...") - print(f"Saving to {filename}...") with open(filename, 'wb') as f: for chunk in dl_resp.iter_content(chunk_size=8192): f.write(chunk) - print(f"Downloaded {filename}") - - if filename.endswith(".apkm") or filename.endswith(".apks"): - print("Bundle detected. Extracting base APK...") + # Handle bundles + if filename.endswith((".apkm", ".apks")): + print("[APKMirror] Bundle detected. Extracting base APK...") extracted = apkm_converter.extract_base_apk(filename, output_dir=builds_dir) if extracted: - final_apk_name = os.path.join(builds_dir, f"{base_filename}.apk") - if os.path.abspath(extracted) != os.path.abspath(final_apk_name): - os.rename(extracted, final_apk_name) + final = os.path.join(builds_dir, f"{base_filename}.apk") + if os.path.abspath(extracted) != os.path.abspath(final): + os.rename(extracted, final) os.remove(filename) - print(f"Ready: {final_apk_name}") + print(f"✓ Ready: {final}") + return final else: - print("Extraction failed.") - else: - print(f"Ready: {filename}") - return filename - - -def download_app(app_name, version=""): - """Smart downloader: uses UptoDown for Cloudflare-blocked apps, APKMirror otherwise.""" - if app_name in UPTODOWN_ONLY: - return download_via_uptodown(app_name) + print("✗ Bundle extraction failed.") + return None else: - return download_via_apkmirror(app_name, version) + print(f"✓ Ready: {filename}") + return filename def main(): - # Check for non-interactive flag + # Non-interactive batch mode if len(sys.argv) > 1 and sys.argv[1] == "--download-all": try: with open("apps.json", "r") as f: @@ -163,40 +155,53 @@ def main(): download_app(app_name, version) return - apps = ["YouTube", "YouTube Music", "X", "Instagram"] - print("Select an app to download:") - for i, app in enumerate(apps): - src = "UptoDown" if app in UPTODOWN_ONLY else "APKMirror" - print(f"[{i+1}] {app} (via {src})") - print(f"[{len(apps)+1}] Download All (from apps.json)") - print(f"[{len(apps)+2}] Custom Search") + # --- Interactive menu --- + # Load reference apps from apps.json if available + ref_apps = {} + try: + with open("apps.json", "r") as f: + ref_apps = json.load(f) + except Exception: + pass + + print("\n╔══════════════════════════════╗") + print("║ APK Downloader ║") + print("╚══════════════════════════════╝") + + if ref_apps: + print("\nQuick pick (from apps.json):") + ref_list = list(ref_apps.items()) + for i, (name, ver) in enumerate(ref_list): + print(f" [{i+1}] {name} {ver}") + print(f" [{len(ref_list)+1}] Download ALL above") + print(f" [0] Custom search") + else: + print(" [0] Custom search") try: - app_choice = int(input(" -> ")) - 1 + choice = int(input("\n → ")) except ValueError: - sys.exit("Invalid choice") + sys.exit("Invalid choice.") - if 0 <= app_choice < len(apps): - app_name = apps[app_choice] - if app_name in UPTODOWN_ONLY: - download_app(app_name) - else: - version = input(f"Enter version for {app_name} (or leave blank for latest):\n -> ") - download_app(app_name, version) - elif app_choice == len(apps): - try: - with open("apps.json", "r") as f: - apps_config = json.load(f) - except Exception as e: - sys.exit(f"Failed to read apps.json: {e}") - for app_name, version in apps_config.items(): + if ref_apps: + ref_list = list(ref_apps.items()) + if choice == 0: + app_name = input("App name to search:\n → ").strip() + version = input(f"Version (leave blank for latest):\n → ").strip() download_app(app_name, version) - elif app_choice == len(apps) + 1: - app_name = input("Enter app name to search:\n -> ") - version = input(f"Enter version for {app_name} (or leave blank for latest):\n -> ") - download_app(app_name, version) + elif 1 <= choice <= len(ref_list): + name, ver = ref_list[choice - 1] + download_app(name, ver) + elif choice == len(ref_list) + 1: + for name, ver in ref_list: + download_app(name, ver) + else: + sys.exit("Invalid choice.") else: - sys.exit("Invalid choice") + app_name = input("App name to search:\n → ").strip() + version = input(f"Version (leave blank for latest):\n → ").strip() + download_app(app_name, version) + if __name__ == "__main__": main() diff --git a/apkmirror.py b/apkmirror.py index fd33ba0..32ddf39 100644 --- a/apkmirror.py +++ b/apkmirror.py @@ -35,6 +35,11 @@ def search(self, query): print(f"[search] Status: {resp.status_code}") + # Cloudflare blocked — signal caller to fall back + if resp.status_code == 403 or "Just a moment" in resp.text: + print("[search] Blocked by Cloudflare.") + return None + soup = BeautifulSoup(resp.text, "html.parser") apps = [] appRow = soup.find_all("div", {"class": "appRow"}) diff --git a/uptodown.py b/uptodown.py index 4e0869c..481f985 100644 --- a/uptodown.py +++ b/uptodown.py @@ -7,6 +7,7 @@ """ import time +import re from urllib.parse import quote_plus from bs4 import BeautifulSoup import cloudscraper @@ -44,13 +45,49 @@ def _get(self, url): return self.scraper.get(url, headers=self.headers, timeout=15) def get_app_slug(self, app_name): - """Get the UptoDown slug for an app name, searching if not in known list.""" + """Get the UptoDown slug: check known list first, then search UptoDown.""" slug = APP_SLUGS.get(app_name) if slug: return slug - # Try converting app name to a slug guess + + # Try searching UptoDown for the app + slug = self._search_slug(app_name) + if slug: + return slug + + # Last resort: guess the slug from the name return app_name.lower().replace(" ", "-") + def _search_slug(self, app_name): + """Search UptoDown to find the correct app slug.""" + try: + query = quote_plus(app_name) + # UptoDown search API + url = f"https://en.uptodown.com/android/search/{query}" + r = self.scraper.get(url, headers=self.headers, timeout=10) + if r.status_code != 200: + # Try alternative search format + url = f"https://en.uptodown.com/android/q/{query}" + r = self.scraper.get(url, headers=self.headers, timeout=10) + if r.status_code == 200: + soup = BeautifulSoup(r.text, "html.parser") + # Find first result app link + result = soup.find("a", {"class": "item"}) + if not result: + result = soup.find("div", {"class": "item"}) + if result: + result = result.find("a") + if result and result.get("href"): + href = result["href"] + # Extract slug from URL like https://instagram.en.uptodown.com/android + import re + m = re.search(r"https?://([^.]+)\.en\.uptodown\.com", href) + if m: + return m.group(1) + except Exception as e: + print(f"[uptodown] Search error: {e}") + return None + def get_download_info(self, app_name): """Get the latest version and direct download URL for an app.""" slug = self.get_app_slug(app_name) From 5f449930f276df6de59e4649f47b2b52761966ed Mon Sep 17 00:00:00 2001 From: AI Bot Date: Fri, 5 Jun 2026 00:23:15 +0530 Subject: [PATCH 18/24] Ask before deleting old APK in search mode, auto-delete in batch/actions mode --- apkmcli | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/apkmcli b/apkmcli index de4e264..3eeeaf8 100755 --- a/apkmcli +++ b/apkmcli @@ -39,7 +39,7 @@ def download_via_uptodown(app_name, pkg_name=None): return filename -def download_app(app_name, version=""): +def download_app(app_name, version="", ask_before_delete=False): """ Try APKMirror first. If it blocks (403 / no results), auto-switch to UptoDown. Works for ANY app the user searches — not just the hardcoded list. @@ -98,6 +98,16 @@ def download_app(app_name, version=""): old_files = glob.glob(os.path.join(builds_dir, f"{pkg_name}-*-all.*")) else: old_files = glob.glob(os.path.join(builds_dir, f"{app_name.replace(' ', '_')}-*.*")) + + if old_files and ask_before_delete: + print(f"\nFound existing APK(s):") + for f in old_files: + size = os.path.getsize(f) / (1024*1024) + print(f" {os.path.basename(f)} ({size:.1f} MB)") + confirm = input("Delete old APK(s) before downloading? (y/n):\n → ").strip().lower() + if confirm not in ("y", "yes", ""): + old_files = [] # skip deletion + for f in old_files: try: os.remove(f) @@ -188,19 +198,19 @@ def main(): if choice == 0: app_name = input("App name to search:\n → ").strip() version = input(f"Version (leave blank for latest):\n → ").strip() - download_app(app_name, version) + download_app(app_name, version, ask_before_delete=True) elif 1 <= choice <= len(ref_list): name, ver = ref_list[choice - 1] - download_app(name, ver) + download_app(name, ver, ask_before_delete=True) elif choice == len(ref_list) + 1: for name, ver in ref_list: - download_app(name, ver) + download_app(name, ver, ask_before_delete=False) else: sys.exit("Invalid choice.") else: app_name = input("App name to search:\n → ").strip() version = input(f"Version (leave blank for latest):\n → ").strip() - download_app(app_name, version) + download_app(app_name, version, ask_before_delete=True) if __name__ == "__main__": From 6528afecdb49290d3f9a06884a0410eac1eb3734 Mon Sep 17 00:00:00 2001 From: AI Bot Date: Fri, 5 Jun 2026 00:24:38 +0530 Subject: [PATCH 19/24] Fix workflow: NO_PROXY for UptoDown, install from requirements.txt --- .github/workflows/download_apks.yml | 11 +++++++---- requirements.txt | 2 ++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/download_apks.yml b/.github/workflows/download_apks.yml index e8c0ae1..9376453 100644 --- a/.github/workflows/download_apks.yml +++ b/.github/workflows/download_apks.yml @@ -25,16 +25,17 @@ jobs: - name: Install dependencies run: | - pip install beautifulsoup4 cloudscraper requests + pip install -r requirements.txt - name: Run APK Scraper env: + # Proxy only for APKMirror (routes through your home laptop via Tailscale) HTTP_PROXY: http://100.90.77.61:8899 HTTPS_PROXY: http://100.90.77.61:8899 + # UptoDown downloads directly (no proxy needed — no Cloudflare) + NO_PROXY: uptodown.com,en.uptodown.com,dw.uptodown.com,dw.uptodown.net run: | - # Create builds directory to ensure it exists mkdir -p builds - # Run script in automated mode python apkmcli --download-all - name: Get Date @@ -48,4 +49,6 @@ jobs: name: APK Builds - ${{ steps.date.outputs.date }} files: builds/*.apk body: | - Automated APK fetch via GitHub Actions through Tailscale Proxy. + Automated APK builds: + - YouTube, YouTube Music, X → APKMirror via Tailscale proxy + - Instagram → UptoDown (direct, no Cloudflare) diff --git a/requirements.txt b/requirements.txt index 5eed1e1..5993aa7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ bs4 cloudscraper +requests +beautifulsoup4 From fdc9fa5a6e5f0b21f1a938be40510dfcc2a41061 Mon Sep 17 00:00:00 2001 From: myst25 <83259411+myst-25@users.noreply.github.com> Date: Fri, 5 Jun 2026 00:25:17 +0530 Subject: [PATCH 20/24] Update Instagram version in apps.json --- apps.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps.json b/apps.json index de0316e..568e74c 100644 --- a/apps.json +++ b/apps.json @@ -2,5 +2,5 @@ "YouTube": "21.21.80", "YouTube Music": "9.21.51", "X": "11.91.0-release.0", - "Instagram": "latest" + "Instagram": "426.0.0.37.68" } From 74cca1366399792cf23beee5e4ba8fc9d6418ba3 Mon Sep 17 00:00:00 2001 From: AI Bot Date: Fri, 5 Jun 2026 00:30:20 +0530 Subject: [PATCH 21/24] Update README.md with detailed documentation --- README.md | 298 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 295 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 330487b..ffdb7dd 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,295 @@ -# ampy - an apkmirror scraper in python -Usage: `pip install -r requirements.txt`, then `python apkmcli` -Usage in other apps/scripts: please read the `apkmirror.py` file, or `apkmcli` to understand how it works. Im too lazy to explain, sorry lol +# ampy 📦 + +A fast, smart APK downloader that scrapes the latest versions of your favourite Android apps directly from **APKMirror** and **UptoDown** — with automatic fallback when one source is blocked. + +--- + +## What is this? + +`ampy` is a command-line tool that lets you search for any Android app and download the latest APK automatically. It picks the best available variant (universal architecture, nodpi) so the APK works on every phone. + +It is also fully compatible with **GitHub Actions**, meaning you can set it up once and trigger a download + release of all your apps from the cloud at any time — no manual work required. + +--- + +## How the Scraper Works + +When you run `ampy` and search for an app, it follows this flow: + +``` +User searches "Instagram" + │ + ▼ + Try APKMirror first + │ + ┌───┴───────────────────┐ + │ 200 OK? │ 403 / Blocked? + ▼ ▼ +Download from Auto-switch to +APKMirror ✓ UptoDown ✓ +``` + +### APKMirror +The primary source. It has the most accurate and up-to-date APK releases. The scraper: +1. Searches for the app by name and version +2. Parses the search results page to find matching releases +3. Scores every available variant by architecture and DPI to pick the best one: + - Prefers **universal** architecture (runs on any processor — ARM, ARM64, x86) + - Prefers **nodpi** DPI (graphics scale perfectly on every screen size) + - Prefers bare `.apk` over `.apkm` / `.apks` bundles +4. Navigates through three pages to get the final direct download URL +5. Downloads and saves the file + +### UptoDown (Automatic Fallback) +Some apps — like Instagram, Facebook, and WhatsApp — are aggressively protected by Cloudflare's **Turnstile** bot challenge on APKMirror. No browser automation can solve this challenge without human interaction. + +For these apps, the scraper **automatically falls back** to UptoDown, which serves the same apps without any bot protection. The flow is: +1. APKMirror returns `403 Forbidden` +2. Scraper detects the block immediately +3. Switches to UptoDown, looks up the correct app slug +4. Downloads the latest APK directly from UptoDown's CDN + +This all happens invisibly — you just get your APK. + +--- + +## File Naming + +All downloaded APKs follow the standard naming format: + +``` +com.google.android.youtube-21.21.80-all.apk +com.instagram.android-433.0.0.4.68-all.apk +com.twitter.android-11.91.0-release.0-all.apk +``` + +Format: `[package.name]-[version]-all.apk` + +This format is required by automated patching pipelines (like ReVanced) that look for APKs by package name. + +--- + +## Installation + +```bash +git clone https://github.com/myst-25/ampy +cd ampy +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +--- + +## Usage + +### Interactive Mode + +Run the script and you will get a menu: + +```bash +python apkmcli +``` + +``` +╔══════════════════════════════╗ +║ APK Downloader ║ +╚══════════════════════════════╝ + +Quick pick (from apps.json): + [1] YouTube 21.21.80 + [2] YouTube Music 9.21.51 + [3] X 11.91.0-release.0 + [4] Instagram latest + [5] Download ALL above + [0] Custom search + + → +``` + +- Pick a number to download that specific app +- Pick **[5]** to download every app in `apps.json` at once +- Pick **[0]** to search for any app by name + +### Custom Search + +Choose `[0]` and type any app name: + +``` +App name to search: + → Spotify + +Version (leave blank for latest): + → +``` + +The scraper will search APKMirror first. If it gets blocked, it automatically switches to UptoDown. If an old APK for that app already exists in the `builds/` folder, it will ask you before deleting it: + +``` +Found existing APK(s): + com.spotify.music-8.9.94-all.apk (68.2 MB) +Delete old APK(s) before downloading? (y/n): + → +``` + +### Batch / Automated Mode + +```bash +python apkmcli --download-all +``` + +Reads `apps.json` and downloads every app silently — no prompts, no questions. Old APKs are automatically replaced. This is the mode used by GitHub Actions. + +--- + +## apps.json + +This file defines the apps downloaded in batch mode. Add or remove apps here: + +```json +{ + "YouTube": "21.21.80", + "YouTube Music": "9.21.51", + "X": "11.91.0-release.0", + "Instagram": "latest" +} +``` + +- The **key** is the app name as it appears on APKMirror +- The **value** is the version string, or `"latest"` for UptoDown-sourced apps + +Downloaded APKs are saved to the `builds/` folder. + +--- + +## GitHub Actions Setup + +The GitHub Actions workflow lets you trigger downloads from anywhere, at any time, without your laptop needing to be manually set up each time — except for one thing: **the proxy**. + +### Why a Proxy? + +APKMirror blocks all requests coming from cloud server IP addresses (AWS, Azure, GitHub Actions, etc.). To bypass this, the GitHub Actions runner tunnels all its traffic through your personal laptop using **Tailscale**, which gives the traffic your home IP address. + +UptoDown does **not** require a proxy — its direct downloads bypass Cloudflare entirely and are always fetched directly from the GitHub runner. + +### Setting Up Tailscale + +Tailscale creates a private, secure VPN between your laptop and the GitHub Actions runner. + +**Step 1 — Install Tailscale on your laptop:** +```bash +curl -fsSL https://tailscale.com/install.sh | sh +sudo tailscale up +``` + +**Step 2 — Get your laptop's Tailscale IP:** +```bash +tailscale ip -4 +# Example output: 100.90.77.61 +``` + +**Step 3 — Generate a reusable Auth Key:** +1. Go to [tailscale.com/admin/settings/keys](https://login.tailscale.com/admin/settings/keys) +2. Click **Generate auth key** +3. Check both **Reusable** and **Ephemeral** +4. Copy the key (starts with `tskey-auth-...`) + +**Step 4 — Add the key to GitHub Secrets:** +1. Go to your repo on GitHub → **Settings → Secrets and variables → Actions** +2. Click **New repository secret** +3. Name: `TAILSCALE_AUTHKEY` +4. Value: paste your `tskey-auth-...` key + +### Starting the Local Proxy + +Before triggering the GitHub Action, start the proxy on your laptop: + +```bash +~/.local/bin/proxy --hostname 0.0.0.0 --port 8899 +``` + +> **Important:** Use `--hostname 0.0.0.0` — this tells the proxy to listen on the Tailscale network interface, not just localhost. Without this, the GitHub Actions runner cannot connect. + +The workflow hardcodes your Tailscale IP (`100.90.77.61`) as the proxy address. If your Tailscale IP ever changes, update the `HTTP_PROXY` and `HTTPS_PROXY` lines in `.github/workflows/download_apks.yml`. + +### Triggering the Workflow + +1. Make sure your proxy is running (`~/.local/bin/proxy --hostname 0.0.0.0 --port 8899`) +2. Go to your repo on GitHub → **Actions** → **APK Downloader** +3. Click **Run workflow** → **Run workflow** + +The runner will: +1. Connect to your laptop via Tailscale VPN +2. Download YouTube, YouTube Music, X via APKMirror (through your home IP proxy) +3. Download Instagram via UptoDown (direct — no proxy needed) +4. Create a new GitHub Release with all APKs attached + +### Releases + +Every workflow run creates a new GitHub Release tagged with the current date and time: + +``` +release-2026.06.05-1530 +APK Builds - 2026.06.05-1530 +``` + +All APKs are attached to the release as assets, ready to download. + +--- + +## How the Auto-Fallback Works (Technical) + +The `apkmirror.py` scraper's `search()` method returns: +- A **list of results** → app found, proceed with download +- An **empty list** → no results found, try UptoDown +- `None` → blocked by Cloudflare (403), immediately switch to UptoDown + +The `apkmcli` script checks for `None` before anything else and reroutes the download to UptoDown transparently. + +UptoDown's `get_app_slug()` method tries three strategies to find the correct app page: +1. Check the built-in known-apps dictionary (YouTube, Instagram, X, etc.) +2. Search UptoDown's search endpoint to find the slug dynamically +3. Guess the slug by converting the app name to lowercase with hyphens + +--- + +## Supported Apps (Known Package Names) + +| App | Package Name | Source | +|-----|-------------|--------| +| YouTube | `com.google.android.youtube` | APKMirror | +| YouTube Music | `com.google.android.apps.youtube.music` | APKMirror | +| X (Twitter) | `com.twitter.android` | APKMirror | +| Instagram | `com.instagram.android` | UptoDown (auto-fallback) | +| TikTok | `com.zhiliaoapp.musically` | UptoDown (auto-fallback) | +| WhatsApp | `com.whatsapp` | UptoDown (auto-fallback) | +| Snapchat | `com.snapchat.android` | UptoDown (auto-fallback) | +| Facebook | `com.facebook.katana` | UptoDown (auto-fallback) | +| Telegram | `org.telegram.messenger` | APKMirror | +| Spotify | `com.spotify.music` | APKMirror | + +Any app not in this list will still download correctly — it just won't have a known package name, so the filename will use the scraped app name instead. + +--- + +## Project Structure + +``` +ampy/ +├── apkmcli # Main CLI script — entry point +├── apkmirror.py # APKMirror scraper (primary source) +├── uptodown.py # UptoDown scraper (automatic fallback) +├── apkm_converter.py # Extracts base APK from .apkm / .apks bundles +├── apps.json # Apps to download in batch mode +├── requirements.txt # Python dependencies +├── builds/ # Downloaded APKs saved here +└── .github/ + └── workflows/ + └── download_apks.yml # GitHub Actions workflow +``` + +--- + +## License + +MIT From dffdf7dc83d409a80272ed17beb8dba14cb5f56d Mon Sep 17 00:00:00 2001 From: myst25 <83259411+myst-25@users.noreply.github.com> Date: Fri, 5 Jun 2026 00:30:48 +0530 Subject: [PATCH 22/24] Update apps.json --- apps.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps.json b/apps.json index 568e74c..de0316e 100644 --- a/apps.json +++ b/apps.json @@ -2,5 +2,5 @@ "YouTube": "21.21.80", "YouTube Music": "9.21.51", "X": "11.91.0-release.0", - "Instagram": "426.0.0.37.68" + "Instagram": "latest" } From 2bea4fdeef6db759fca17e22b6b95f0c17373579 Mon Sep 17 00:00:00 2001 From: AI Bot Date: Fri, 5 Jun 2026 14:46:13 +0530 Subject: [PATCH 23/24] Support fetching specific versions from UptoDown instead of always downloading latest --- apkmcli | 14 +++++++------- uptodown.py | 31 +++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/apkmcli b/apkmcli index 3eeeaf8..967638a 100755 --- a/apkmcli +++ b/apkmcli @@ -28,10 +28,10 @@ PACKAGE_NAMES = { } -def download_via_uptodown(app_name, pkg_name=None): - """Download latest APK via UptoDown.""" +def download_via_uptodown(app_name, pkg_name=None, version=""): + """Download latest or specific APK via UptoDown.""" print(f"[→ UptoDown] Downloading {app_name}...") - filename = uptodown.download(app_name, output_dir="builds", package_name=pkg_name) + filename = uptodown.download(app_name, version=version, output_dir="builds", package_name=pkg_name) if filename: print(f"✓ Ready: {filename}") else: @@ -59,12 +59,12 @@ def download_app(app_name, version="", ask_before_delete=False): # None = blocked by Cloudflare → auto-fallback if results is None: print(f"⚠ APKMirror blocked '{app_name}'. Switching to UptoDown automatically...") - return download_via_uptodown(app_name, pkg_name) + return download_via_uptodown(app_name, pkg_name, version) # Empty results = not found on APKMirror → try UptoDown if not results: print(f"⚠ No results for '{app_name}' on APKMirror. Trying UptoDown...") - return download_via_uptodown(app_name, pkg_name) + return download_via_uptodown(app_name, pkg_name, version) # Filter results to best match filtered = [r for r in results if r['name'].startswith(app_name + " ") and @@ -77,7 +77,7 @@ def download_app(app_name, version="", ask_before_delete=False): if not app_details: print(f"⚠ Could not get variant details from APKMirror. Switching to UptoDown...") - return download_via_uptodown(app_name, pkg_name) + return download_via_uptodown(app_name, pkg_name, version) print(f"[APKMirror] Variant: {app_details['architecture']} / {app_details['dpi']} ({app_details['type']})") @@ -88,7 +88,7 @@ def download_app(app_name, version="", ask_before_delete=False): direct_url = apkm.get_direct_download_link(direct_link) except Exception as e: print(f"⚠ Failed to get download link from APKMirror ({e}). Switching to UptoDown...") - return download_via_uptodown(app_name, pkg_name) + return download_via_uptodown(app_name, pkg_name, version) print(f"[APKMirror] Downloading...") dl_resp = apkm.scraper.get(direct_url, stream=True, headers=apkm.headers) diff --git a/uptodown.py b/uptodown.py index 481f985..9266659 100644 --- a/uptodown.py +++ b/uptodown.py @@ -88,12 +88,35 @@ def _search_slug(self, app_name): print(f"[uptodown] Search error: {e}") return None - def get_download_info(self, app_name): - """Get the latest version and direct download URL for an app.""" + def get_download_info(self, app_name, target_version=None): + """Get the specified version (or latest) and direct download URL for an app.""" slug = self.get_app_slug(app_name) app_url = f"https://{slug}.en.uptodown.com/android" + + target_version = target_version.strip() if target_version else None + if target_version and target_version.lower() != "latest": + # Search for specific version + versions_url = f"{app_url}/versions" + print(f"[uptodown] Fetching versions from {versions_url}") + r = self._get(versions_url) + if r.status_code == 200: + soup = BeautifulSoup(r.text, "html.parser") + versions_list = soup.find("div", {"id": "versions-items-list"}) + if versions_list: + for item in versions_list.find_all("div", recursive=False): + v_span = item.find("span", {"class": "version"}) + if v_span and target_version in v_span.text.strip(): + v_id = item.get("data-version-id") + if v_id: + download_page_url = f"{app_url}/download/{v_id}" + return self._parse_download_page(app_name, app_url, download_page_url) + print(f"[uptodown] Target version '{target_version}' not found. Falling back to latest.") + + # Default to latest download_page_url = f"{app_url}/download" + return self._parse_download_page(app_name, app_url, download_page_url) + def _parse_download_page(self, app_name, app_url, download_page_url): print(f"[uptodown] Fetching {download_page_url}") r = self._get(download_page_url) @@ -131,11 +154,11 @@ def get_download_info(self, app_name): "app_page": app_url, } - def download(self, app_name, output_dir="builds", package_name=None): + def download(self, app_name, version=None, output_dir="builds", package_name=None): """Download the latest APK for an app.""" import os - info = self.get_download_info(app_name) + info = self.get_download_info(app_name, target_version=version) if not info: print(f"[uptodown] Could not get download info for {app_name}") return None From e66066e7d43da28455d9cd2fb927eb6214cc66b5 Mon Sep 17 00:00:00 2001 From: myst25 <83259411+myst-25@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:54:14 +0530 Subject: [PATCH 24/24] Update apps.json --- apps.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps.json b/apps.json index de0316e..568e74c 100644 --- a/apps.json +++ b/apps.json @@ -2,5 +2,5 @@ "YouTube": "21.21.80", "YouTube Music": "9.21.51", "X": "11.91.0-release.0", - "Instagram": "latest" + "Instagram": "426.0.0.37.68" }