diff --git a/.github/workflows/download_apks.yml b/.github/workflows/download_apks.yml new file mode 100644 index 0000000..9376453 --- /dev/null +++ b/.github/workflows/download_apks.yml @@ -0,0 +1,54 @@ +name: APK Downloader + +on: + workflow_dispatch: + +permissions: + contents: write + +jobs: + download-and-release: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Tailscale + uses: tailscale/github-action@v2 + with: + authkey: ${{ secrets.TAILSCALE_AUTHKEY }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + pip install -r requirements.txt + + - name: Run APK Scraper + env: + # Proxy only for APKMirror (routes through your home laptop via Tailscale) + HTTP_PROXY: http://100.90.77.61:8899 + HTTPS_PROXY: http://100.90.77.61:8899 + # UptoDown downloads directly (no proxy needed — no Cloudflare) + NO_PROXY: uptodown.com,en.uptodown.com,dw.uptodown.com,dw.uptodown.net + run: | + mkdir -p builds + python apkmcli --download-all + + - name: Get Date + id: date + run: echo "date=$(date +'%Y.%m.%d-%H%M')" >> $GITHUB_OUTPUT + + - name: Create Release and Upload APKs + uses: softprops/action-gh-release@v2 + with: + tag_name: release-${{ steps.date.outputs.date }} + name: APK Builds - ${{ steps.date.outputs.date }} + files: builds/*.apk + body: | + Automated APK builds: + - YouTube, YouTube Music, X → APKMirror via Tailscale proxy + - Instagram → UptoDown (direct, no Cloudflare) diff --git a/README.md b/README.md index 330487b..ffdb7dd 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,295 @@ -# ampy - an apkmirror scraper in python -Usage: `pip install -r requirements.txt`, then `python apkmcli` -Usage in other apps/scripts: please read the `apkmirror.py` file, or `apkmcli` to understand how it works. Im too lazy to explain, sorry lol +# ampy 📦 + +A fast, smart APK downloader that scrapes the latest versions of your favourite Android apps directly from **APKMirror** and **UptoDown** — with automatic fallback when one source is blocked. + +--- + +## What is this? + +`ampy` is a command-line tool that lets you search for any Android app and download the latest APK automatically. It picks the best available variant (universal architecture, nodpi) so the APK works on every phone. + +It is also fully compatible with **GitHub Actions**, meaning you can set it up once and trigger a download + release of all your apps from the cloud at any time — no manual work required. + +--- + +## How the Scraper Works + +When you run `ampy` and search for an app, it follows this flow: + +``` +User searches "Instagram" + │ + ▼ + Try APKMirror first + │ + ┌───┴───────────────────┐ + │ 200 OK? │ 403 / Blocked? + ▼ ▼ +Download from Auto-switch to +APKMirror ✓ UptoDown ✓ +``` + +### APKMirror +The primary source. It has the most accurate and up-to-date APK releases. The scraper: +1. Searches for the app by name and version +2. Parses the search results page to find matching releases +3. Scores every available variant by architecture and DPI to pick the best one: + - Prefers **universal** architecture (runs on any processor — ARM, ARM64, x86) + - Prefers **nodpi** DPI (graphics scale perfectly on every screen size) + - Prefers bare `.apk` over `.apkm` / `.apks` bundles +4. Navigates through three pages to get the final direct download URL +5. Downloads and saves the file + +### UptoDown (Automatic Fallback) +Some apps — like Instagram, Facebook, and WhatsApp — are aggressively protected by Cloudflare's **Turnstile** bot challenge on APKMirror. No browser automation can solve this challenge without human interaction. + +For these apps, the scraper **automatically falls back** to UptoDown, which serves the same apps without any bot protection. The flow is: +1. APKMirror returns `403 Forbidden` +2. Scraper detects the block immediately +3. Switches to UptoDown, looks up the correct app slug +4. Downloads the latest APK directly from UptoDown's CDN + +This all happens invisibly — you just get your APK. + +--- + +## File Naming + +All downloaded APKs follow the standard naming format: + +``` +com.google.android.youtube-21.21.80-all.apk +com.instagram.android-433.0.0.4.68-all.apk +com.twitter.android-11.91.0-release.0-all.apk +``` + +Format: `[package.name]-[version]-all.apk` + +This format is required by automated patching pipelines (like ReVanced) that look for APKs by package name. + +--- + +## Installation + +```bash +git clone https://github.com/myst-25/ampy +cd ampy +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +--- + +## Usage + +### Interactive Mode + +Run the script and you will get a menu: + +```bash +python apkmcli +``` + +``` +╔══════════════════════════════╗ +║ APK Downloader ║ +╚══════════════════════════════╝ + +Quick pick (from apps.json): + [1] YouTube 21.21.80 + [2] YouTube Music 9.21.51 + [3] X 11.91.0-release.0 + [4] Instagram latest + [5] Download ALL above + [0] Custom search + + → +``` + +- Pick a number to download that specific app +- Pick **[5]** to download every app in `apps.json` at once +- Pick **[0]** to search for any app by name + +### Custom Search + +Choose `[0]` and type any app name: + +``` +App name to search: + → Spotify + +Version (leave blank for latest): + → +``` + +The scraper will search APKMirror first. If it gets blocked, it automatically switches to UptoDown. If an old APK for that app already exists in the `builds/` folder, it will ask you before deleting it: + +``` +Found existing APK(s): + com.spotify.music-8.9.94-all.apk (68.2 MB) +Delete old APK(s) before downloading? (y/n): + → +``` + +### Batch / Automated Mode + +```bash +python apkmcli --download-all +``` + +Reads `apps.json` and downloads every app silently — no prompts, no questions. Old APKs are automatically replaced. This is the mode used by GitHub Actions. + +--- + +## apps.json + +This file defines the apps downloaded in batch mode. Add or remove apps here: + +```json +{ + "YouTube": "21.21.80", + "YouTube Music": "9.21.51", + "X": "11.91.0-release.0", + "Instagram": "latest" +} +``` + +- The **key** is the app name as it appears on APKMirror +- The **value** is the version string, or `"latest"` for UptoDown-sourced apps + +Downloaded APKs are saved to the `builds/` folder. + +--- + +## GitHub Actions Setup + +The GitHub Actions workflow lets you trigger downloads from anywhere, at any time, without your laptop needing to be manually set up each time — except for one thing: **the proxy**. + +### Why a Proxy? + +APKMirror blocks all requests coming from cloud server IP addresses (AWS, Azure, GitHub Actions, etc.). To bypass this, the GitHub Actions runner tunnels all its traffic through your personal laptop using **Tailscale**, which gives the traffic your home IP address. + +UptoDown does **not** require a proxy — its direct downloads bypass Cloudflare entirely and are always fetched directly from the GitHub runner. + +### Setting Up Tailscale + +Tailscale creates a private, secure VPN between your laptop and the GitHub Actions runner. + +**Step 1 — Install Tailscale on your laptop:** +```bash +curl -fsSL https://tailscale.com/install.sh | sh +sudo tailscale up +``` + +**Step 2 — Get your laptop's Tailscale IP:** +```bash +tailscale ip -4 +# Example output: 100.90.77.61 +``` + +**Step 3 — Generate a reusable Auth Key:** +1. Go to [tailscale.com/admin/settings/keys](https://login.tailscale.com/admin/settings/keys) +2. Click **Generate auth key** +3. Check both **Reusable** and **Ephemeral** +4. Copy the key (starts with `tskey-auth-...`) + +**Step 4 — Add the key to GitHub Secrets:** +1. Go to your repo on GitHub → **Settings → Secrets and variables → Actions** +2. Click **New repository secret** +3. Name: `TAILSCALE_AUTHKEY` +4. Value: paste your `tskey-auth-...` key + +### Starting the Local Proxy + +Before triggering the GitHub Action, start the proxy on your laptop: + +```bash +~/.local/bin/proxy --hostname 0.0.0.0 --port 8899 +``` + +> **Important:** Use `--hostname 0.0.0.0` — this tells the proxy to listen on the Tailscale network interface, not just localhost. Without this, the GitHub Actions runner cannot connect. + +The workflow hardcodes your Tailscale IP (`100.90.77.61`) as the proxy address. If your Tailscale IP ever changes, update the `HTTP_PROXY` and `HTTPS_PROXY` lines in `.github/workflows/download_apks.yml`. + +### Triggering the Workflow + +1. Make sure your proxy is running (`~/.local/bin/proxy --hostname 0.0.0.0 --port 8899`) +2. Go to your repo on GitHub → **Actions** → **APK Downloader** +3. Click **Run workflow** → **Run workflow** + +The runner will: +1. Connect to your laptop via Tailscale VPN +2. Download YouTube, YouTube Music, X via APKMirror (through your home IP proxy) +3. Download Instagram via UptoDown (direct — no proxy needed) +4. Create a new GitHub Release with all APKs attached + +### Releases + +Every workflow run creates a new GitHub Release tagged with the current date and time: + +``` +release-2026.06.05-1530 +APK Builds - 2026.06.05-1530 +``` + +All APKs are attached to the release as assets, ready to download. + +--- + +## How the Auto-Fallback Works (Technical) + +The `apkmirror.py` scraper's `search()` method returns: +- A **list of results** → app found, proceed with download +- An **empty list** → no results found, try UptoDown +- `None` → blocked by Cloudflare (403), immediately switch to UptoDown + +The `apkmcli` script checks for `None` before anything else and reroutes the download to UptoDown transparently. + +UptoDown's `get_app_slug()` method tries three strategies to find the correct app page: +1. Check the built-in known-apps dictionary (YouTube, Instagram, X, etc.) +2. Search UptoDown's search endpoint to find the slug dynamically +3. Guess the slug by converting the app name to lowercase with hyphens + +--- + +## Supported Apps (Known Package Names) + +| App | Package Name | Source | +|-----|-------------|--------| +| YouTube | `com.google.android.youtube` | APKMirror | +| YouTube Music | `com.google.android.apps.youtube.music` | APKMirror | +| X (Twitter) | `com.twitter.android` | APKMirror | +| Instagram | `com.instagram.android` | UptoDown (auto-fallback) | +| TikTok | `com.zhiliaoapp.musically` | UptoDown (auto-fallback) | +| WhatsApp | `com.whatsapp` | UptoDown (auto-fallback) | +| Snapchat | `com.snapchat.android` | UptoDown (auto-fallback) | +| Facebook | `com.facebook.katana` | UptoDown (auto-fallback) | +| Telegram | `org.telegram.messenger` | APKMirror | +| Spotify | `com.spotify.music` | APKMirror | + +Any app not in this list will still download correctly — it just won't have a known package name, so the filename will use the scraped app name instead. + +--- + +## Project Structure + +``` +ampy/ +├── apkmcli # Main CLI script — entry point +├── apkmirror.py # APKMirror scraper (primary source) +├── uptodown.py # UptoDown scraper (automatic fallback) +├── apkm_converter.py # Extracts base APK from .apkm / .apks bundles +├── apps.json # Apps to download in batch mode +├── requirements.txt # Python dependencies +├── builds/ # Downloaded APKs saved here +└── .github/ + └── workflows/ + └── download_apks.yml # GitHub Actions workflow +``` + +--- + +## License + +MIT diff --git a/apkm_converter.py b/apkm_converter.py new file mode 100644 index 0000000..723ca6f --- /dev/null +++ b/apkm_converter.py @@ -0,0 +1,55 @@ +import os +import zipfile +import shutil +import sys + +def extract_base_apk(bundle_path: str, output_dir: str = "."): + """ + Extracts the base.apk from an .apkm or .apks archive. + """ + if not zipfile.is_zipfile(bundle_path): + print(f"Error: {bundle_path} is not a valid zip archive.") + return None + + extracted_path = None + with zipfile.ZipFile(bundle_path, 'r') as zip_ref: + base_apk_names = [name for name in zip_ref.namelist() if "base.apk" in name.lower()] + + target_name = None + if base_apk_names: + target_name = base_apk_names[0] + else: + apks = [name for name in zip_ref.namelist() if name.endswith('.apk')] + if len(apks) == 1: + target_name = apks[0] + elif apks: + non_splits = [n for n in apks if "split" not in n.lower() and "config" not in n.lower()] + if non_splits: + target_name = non_splits[0] + else: + target_name = apks[0] + + if target_name: + print(f"Found base APK candidate: {target_name} inside the bundle.") + zip_ref.extract(target_name, path=output_dir) + extracted_path = os.path.join(output_dir, target_name) + + base_name = os.path.basename(bundle_path) + new_name = os.path.splitext(base_name)[0] + "_base.apk" + new_path = os.path.join(output_dir, new_name) + + if os.path.abspath(extracted_path) != os.path.abspath(new_path): + shutil.move(extracted_path, new_path) + + print(f"Successfully extracted base APK to {new_path}") + return new_path + else: + print("Could not find a base APK inside the bundle.") + return None + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python apkm_converter.py ") + sys.exit(1) + + extract_base_apk(sys.argv[1]) diff --git a/apkmcli b/apkmcli index 8da30b2..967638a 100755 --- a/apkmcli +++ b/apkmcli @@ -1,36 +1,217 @@ #!/usr/bin/env python3 import sys +import os +import re +import glob +import json from apkmirror import APKMirror +from uptodown import UptoDown +import apkm_converter apkm = APKMirror(timeout=3, results=5) +uptodown = UptoDown(timeout=2) -search_query = input("Search:\n -> ") -results = apkm.search(search_query) +# Known package names for standard filename format +PACKAGE_NAMES = { + "YouTube": "com.google.android.youtube", + "YouTube Music": "com.google.android.apps.youtube.music", + "X": "com.twitter.android", + "Instagram": "com.instagram.android", + "TikTok": "com.zhiliaoapp.musically", + "WhatsApp": "com.whatsapp", + "Snapchat": "com.snapchat.android", + "Facebook": "com.facebook.katana", + "Telegram": "org.telegram.messenger", + "Spotify": "com.spotify.music", +} -for result in enumerate(results): - print(f"[{result[0]}] {result[1]['name']}") -download_id = int(input("Enter number to get details, or 99 to exit:\n -> ")) +def download_via_uptodown(app_name, pkg_name=None, version=""): + """Download latest or specific APK via UptoDown.""" + print(f"[→ UptoDown] Downloading {app_name}...") + filename = uptodown.download(app_name, version=version, output_dir="builds", package_name=pkg_name) + if filename: + print(f"✓ Ready: {filename}") + else: + print(f"✗ Could not download {app_name} from UptoDown either.") + return filename -if download_id == 99: - sys.exit("Exit") -app_details = apkm.get_app_details(results[download_id]["link"]) +def download_app(app_name, version="", ask_before_delete=False): + """ + Try APKMirror first. If it blocks (403 / no results), auto-switch to UptoDown. + Works for ANY app the user searches — not just the hardcoded list. + """ + pkg_name = PACKAGE_NAMES.get(app_name) + builds_dir = "builds" + os.makedirs(builds_dir, exist_ok=True) -print(f"This app is for \"{app_details['architecture']}\" devices, running {app_details['android_version']} with {app_details['dpi']} DPI") + print(f"\n{'='*50}") + print(f" Searching: {app_name}{' ' + version if version else ''}") + print(f"{'='*50}") -ask_download = input("Do you want to download it? (y/n)\n -> ") + # --- Try APKMirror first --- + search_query = f"{app_name} {version}".strip() + results = apkm.search(search_query) -if ask_download.lower() in ("y", ""): - app_link = app_details["download_link"] - print(f"Trying to get direct link, if the script cant get it, download by visiting this (not direct url): {app_link}") + # None = blocked by Cloudflare → auto-fallback + if results is None: + print(f"⚠ APKMirror blocked '{app_name}'. Switching to UptoDown automatically...") + return download_via_uptodown(app_name, pkg_name, version) - direct_link = apkm.get_download_link(app_link) + # Empty results = not found on APKMirror → try UptoDown + if not results: + print(f"⚠ No results for '{app_name}' on APKMirror. Trying UptoDown...") + return download_via_uptodown(app_name, pkg_name, version) - print("Got the link i neded, trying to get a direct link...") - sys.exit(f"Done. Direct url: {apkm.get_direct_download_link(direct_link)}") + # Filter results to best match + filtered = [r for r in results if r['name'].startswith(app_name + " ") and + r['name'][len(app_name):].strip() and r['name'][len(app_name):].strip()[0].isdigit()] + if not filtered: + filtered = results -else: - sys.exit("Exit") + print(f"[APKMirror] Found: {filtered[0]['name']}") + app_details = apkm.get_app_details(filtered[0]["link"]) + + if not app_details: + print(f"⚠ Could not get variant details from APKMirror. Switching to UptoDown...") + return download_via_uptodown(app_name, pkg_name, version) + + print(f"[APKMirror] Variant: {app_details['architecture']} / {app_details['dpi']} ({app_details['type']})") + + # Get download URLs + try: + app_link = app_details["download_link"] + direct_link = apkm.get_download_link(app_link) + direct_url = apkm.get_direct_download_link(direct_link) + except Exception as e: + print(f"⚠ Failed to get download link from APKMirror ({e}). Switching to UptoDown...") + return download_via_uptodown(app_name, pkg_name, version) + + print(f"[APKMirror] Downloading...") + dl_resp = apkm.scraper.get(direct_url, stream=True, headers=apkm.headers) + + # Delete old versions + if pkg_name: + old_files = glob.glob(os.path.join(builds_dir, f"{pkg_name}-*-all.*")) + else: + old_files = glob.glob(os.path.join(builds_dir, f"{app_name.replace(' ', '_')}-*.*")) + + if old_files and ask_before_delete: + print(f"\nFound existing APK(s):") + for f in old_files: + size = os.path.getsize(f) / (1024*1024) + print(f" {os.path.basename(f)} ({size:.1f} MB)") + confirm = input("Delete old APK(s) before downloading? (y/n):\n → ").strip().lower() + if confirm not in ("y", "yes", ""): + old_files = [] # skip deletion + + for f in old_files: + try: + os.remove(f) + except Exception: + pass + + # Build filename + clean_version = version.strip() or filtered[0]['name'].split()[-1] + if pkg_name: + base_filename = f"{pkg_name}-{clean_version}-all" + else: + base_filename = f"{filtered[0]['name'].replace(' ', '_')}" + + ext = ".apk" + cd = dl_resp.headers.get('content-disposition', '') + match = re.findall(r'filename=(.+)', cd) + if match: + ext = os.path.splitext(match[0].strip('"'))[1] or ".apk" + + filename = os.path.join(builds_dir, f"{base_filename}{ext}") + print(f"[APKMirror] Saving to {filename}...") + + with open(filename, 'wb') as f: + for chunk in dl_resp.iter_content(chunk_size=8192): + f.write(chunk) + + # Handle bundles + if filename.endswith((".apkm", ".apks")): + print("[APKMirror] Bundle detected. Extracting base APK...") + extracted = apkm_converter.extract_base_apk(filename, output_dir=builds_dir) + if extracted: + final = os.path.join(builds_dir, f"{base_filename}.apk") + if os.path.abspath(extracted) != os.path.abspath(final): + os.rename(extracted, final) + os.remove(filename) + print(f"✓ Ready: {final}") + return final + else: + print("✗ Bundle extraction failed.") + return None + else: + print(f"✓ Ready: {filename}") + return filename + + +def main(): + # Non-interactive batch mode + if len(sys.argv) > 1 and sys.argv[1] == "--download-all": + try: + with open("apps.json", "r") as f: + apps_config = json.load(f) + except Exception as e: + sys.exit(f"Failed to read apps.json: {e}") + for app_name, version in apps_config.items(): + download_app(app_name, version) + return + + # --- Interactive menu --- + # Load reference apps from apps.json if available + ref_apps = {} + try: + with open("apps.json", "r") as f: + ref_apps = json.load(f) + except Exception: + pass + + print("\n╔══════════════════════════════╗") + print("║ APK Downloader ║") + print("╚══════════════════════════════╝") + + if ref_apps: + print("\nQuick pick (from apps.json):") + ref_list = list(ref_apps.items()) + for i, (name, ver) in enumerate(ref_list): + print(f" [{i+1}] {name} {ver}") + print(f" [{len(ref_list)+1}] Download ALL above") + print(f" [0] Custom search") + else: + print(" [0] Custom search") + + try: + choice = int(input("\n → ")) + except ValueError: + sys.exit("Invalid choice.") + + if ref_apps: + ref_list = list(ref_apps.items()) + if choice == 0: + app_name = input("App name to search:\n → ").strip() + version = input(f"Version (leave blank for latest):\n → ").strip() + download_app(app_name, version, ask_before_delete=True) + elif 1 <= choice <= len(ref_list): + name, ver = ref_list[choice - 1] + download_app(name, ver, ask_before_delete=True) + elif choice == len(ref_list) + 1: + for name, ver in ref_list: + download_app(name, ver, ask_before_delete=False) + else: + sys.exit("Invalid choice.") + else: + app_name = input("App name to search:\n → ").strip() + version = input(f"Version (leave blank for latest):\n → ").strip() + download_app(app_name, version, ask_before_delete=True) + + +if __name__ == "__main__": + main() diff --git a/apkmirror.py b/apkmirror.py index a75f756..32ddf39 100644 --- a/apkmirror.py +++ b/apkmirror.py @@ -35,6 +35,11 @@ def search(self, query): print(f"[search] Status: {resp.status_code}") + # Cloudflare blocked — signal caller to fall back + if resp.status_code == 403 or "Just a moment" in resp.text: + print("[search] Blocked by Cloudflare.") + return None + soup = BeautifulSoup(resp.text, "html.parser") apps = [] appRow = soup.find_all("div", {"class": "appRow"}) @@ -69,57 +74,74 @@ def get_app_details(self, app_link): soup = BeautifulSoup(resp.text, "html.parser") - data = soup.find_all("div", {"class": ["table-row", "headerFont"]})[1] - - architecture = data.find_all( - "div", - { - "class": [ - "table-cell", - "rowheight", - "addseparator", - "expand", - "pad", - "dowrap", - ] - }, - )[1].text.strip() - android_version = data.find_all( - "div", - { - "class": [ - "table-cell", - "rowheight", - "addseparator", - "expand", - "pad", - "dowrap", - ] - }, - )[2].text.strip() - dpi = data.find_all( - "div", - { - "class": [ - "table-cell", - "rowheight", - "addseparator", - "expand", - "pad", - "dowrap", - ] - }, - )[3].text.strip() - download_link = ( - self.base_url + data.find_all("a", {"class": "accent_color"})[0]["href"] - ) - - return { - "architecture": architecture, - "android_version": android_version, - "dpi": dpi, - "download_link": download_link, - } + rows = soup.find_all("div", {"class": ["table-row", "headerFont"]}) + + variants = [] + for i, row in enumerate(rows): + if i == 0: + continue # Skip header + + cells = row.find_all( + "div", + { + "class": [ + "table-cell", + "rowheight", + "addseparator", + "expand", + "pad", + "dowrap", + ] + }, + ) + if len(cells) < 4: + continue + + arch = cells[1].text.strip() + android_version = cells[2].text.strip() + dpi = cells[3].text.strip() + + is_bundle = "APK" + badge = cells[0].find("span", {"class": "apkm-badge"}) + if badge: + is_bundle = badge.text.strip() + + link_elem = row.find_all("a", {"class": "accent_color"}) + download_link = self.base_url + link_elem[0]["href"] if link_elem else None + + if download_link: + variants.append({ + "architecture": arch, + "android_version": android_version, + "dpi": dpi, + "download_link": download_link, + "type": is_bundle + }) + + if not variants: + return None + + def score_variant(v): + score = 0 + # Prioritize APK over BUNDLE + if v["type"] == "APK": score += 1000 + elif v["type"] == "BUNDLE": score += 500 + + # Prioritize universal architecture + arch = v["architecture"].lower() + if arch == "universal": score += 100 + elif "arm64-v8a" in arch: score += 50 + elif "armeabi-v7a" in arch: score += 10 + + # Prioritize nodpi for maximum compatibility + dpi = v["dpi"].lower() + if dpi == "nodpi": score += 20 + + return score + + best_variant = max(variants, key=score_variant) + + return best_variant def get_download_link(self, app_download_link): print("[get_download_link] Sleeping...") diff --git a/apps.json b/apps.json new file mode 100644 index 0000000..568e74c --- /dev/null +++ b/apps.json @@ -0,0 +1,6 @@ +{ + "YouTube": "21.21.80", + "YouTube Music": "9.21.51", + "X": "11.91.0-release.0", + "Instagram": "426.0.0.37.68" +} diff --git a/requirements.txt b/requirements.txt index 5eed1e1..5993aa7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ bs4 cloudscraper +requests +beautifulsoup4 diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..ec0f84e --- /dev/null +++ b/scraper.py @@ -0,0 +1,467 @@ +#!/usr/bin/env python3 +""" +Morphe APK Scraper - Run locally on your laptop +Usage: + export GITHUB_TOKEN="your_token" + python scraper.py + +Downloads APKs into the current directory, uploads each one to the +GitHub release as soon as it's downloaded, then deletes the local file. +At the end it verifies every app is present in the release. +""" + +import json +import os +import re +import sys +import time +from pathlib import Path +from urllib.parse import urljoin + +import requests +from bs4 import BeautifulSoup + +# ── config ──────────────────────────────────────────────────────────────────── +GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "") +GITHUB_REPO = os.environ.get("GITHUB_REPOSITORY", "myst-25/morphe-apk-scraper") +RELEASE_TAG = "apks" +APPS_FILE = Path(__file__).parent / "apps.json" +DOWNLOAD_DIR = Path(__file__).parent # save right here, no sub-folder + +HEADERS = { + "User-Agent": ( + "Mozilla/5.0 (Linux; Android 13; Pixel 7) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/124.0.6367.82 Mobile Safari/537.36" + ), + "Accept-Language": "en-US,en;q=0.9", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Referer": "https://www.google.com/", +} + +SESSION = requests.Session() +SESSION.headers.update(HEADERS) + +# ── tiny helpers ────────────────────────────────────────────────────────────── + +def banner(text): + print(f"\n{'='*64}") + print(f" {text}") + print(f"{'='*64}") + +def log(msg): print(f" {msg}") +def ok(msg): print(f" \033[92m✔ {msg}\033[0m") +def err(msg): print(f" \033[91m✘ {msg}\033[0m") + + +def get_page(url, retries=3): + for i in range(retries): + try: + r = SESSION.get(url, timeout=30, allow_redirects=True) + log(f"GET {url[:90]} → HTTP {r.status_code}") + if r.status_code == 429: + wait = int(r.headers.get("Retry-After", 30)) + log(f"rate-limited, sleeping {wait}s") + time.sleep(wait) + continue + if r.status_code == 404: + return None + r.raise_for_status() + return r + except Exception as e: + log(f"attempt {i+1} failed: {e}") + time.sleep(5 * (i + 1)) + return None + + +def soup(url): + r = get_page(url) + return BeautifulSoup(r.text, "html.parser") if r else None + + +def ver_slug(v): + return re.sub(r"[^a-zA-Z0-9]+", "-", v).strip("-").lower() + + +def download_apk(url, dest, retries=3): + """Stream-download url → dest. Returns True if valid APK saved.""" + for i in range(retries): + try: + with SESSION.get(url, stream=True, timeout=180, + allow_redirects=True) as r: + ct = r.headers.get("Content-Type", "") + size = int(r.headers.get("Content-Length", 0)) + log(f"DL status={r.status_code} ct={ct} size={size//1024}KB") + if "text/html" in ct or r.status_code >= 400: + log("blocked / not an APK, skipping") + return False + with open(dest, "wb") as f: + for chunk in r.iter_content(131072): + f.write(chunk) + saved = dest.stat().st_size + if saved < 500_000: + log(f"file too small ({saved} B) – not a valid APK") + dest.unlink(missing_ok=True) + return False + ok(f"saved {dest.name} ({saved // 1024 // 1024} MB)") + return True + except Exception as e: + log(f"download attempt {i+1} error: {e}") + dest.unlink(missing_ok=True) + time.sleep(8 * (i + 1)) + return False + + +# ── Source 1 : APKMirror ────────────────────────────────────────────────────── + +def src_apkmirror(app): + log("[APKMirror]") + base = app["apkmirror_url"].rstrip("/") + "/" + version = app.get("version") + package = app["package"] + arch = app.get("arch", "nodpi") + + # ── find release page + s = soup(base) + if not s: + return None + + release_page = None + if version: + slug = ver_slug(version) + for a in s.find_all("a", href=True): + h = a["href"] + if slug in h and "/apk/" in h and "download" not in h: + release_page = "https://www.apkmirror.com" + h if h.startswith("/") else h + break + if not release_page: + app_slug = base.rstrip("/").split("/")[-1] + release_page = f"{base}{app_slug}-{slug}-release/" + else: + a = s.find("a", href=re.compile(r"-release/$")) + if a: + h = a["href"] + release_page = "https://www.apkmirror.com" + h if h.startswith("/") else h + + if not release_page: + log("no release page found") + return None + log(f"release_page={release_page}") + + # ── pick best variant + s2 = soup(release_page) + if not s2: + return None + candidates = [] + for a in s2.find_all("a", href=re.compile(r"/apk/.+/\d+/$")): + pt = (a.find_parent() or a).get_text(" ", strip=True).upper() + if "BUNDLE" in pt or "APKM" in pt: + continue + candidates.append((a["href"], pt.lower())) + + def score(item): + h, t = item + if arch and arch != "nodpi" and arch.lower() in t: + return 0 + if "nodpi" in t or "universal" in t: + return 1 + return 2 + + if not candidates: + log("no variant candidates") + return None + candidates.sort(key=score) + vh = candidates[0][0] + variant_page = "https://www.apkmirror.com" + vh if vh.startswith("/") else vh + log(f"variant_page={variant_page}") + + # ── interstitial + s3 = soup(variant_page) + if not s3: + return None + btn = s3.find("a", href=re.compile(r"download/\?key=")) + if not btn: + log("no download button on variant page") + return None + ih = btn["href"] + interstitial = "https://www.apkmirror.com" + ih if ih.startswith("/") else ih + log(f"interstitial={interstitial}") + + # ── CDN url + s4 = soup(interstitial) + if not s4: + return None + final = None + for a in s4.find_all("a", href=True): + if "cdn.apkmirror.com" in a["href"] or re.search(r"\.apk(\?|$)", a["href"]): + final = a["href"] + break + if not final: + log("no CDN url found") + return None + log(f"CDN={final[:80]}") + + dest = DOWNLOAD_DIR / f"{package}.apk" + return dest if download_apk(final, dest) else None + + +# ── Source 2 : Uptodown ─────────────────────────────────────────────────────── + +def src_uptodown(app): + log("[Uptodown]") + base = app.get("uptodown_dlurl", "").rstrip("/") + version = app.get("version") + package = app["package"] + if not base: + log("no uptodown_dlurl configured") + return None + + versions_url = f"{base}/versions" + s = soup(versions_url) or soup(base) + if not s: + return None + + dl_page = None + if version: + for a in s.find_all("a", href=True): + parent_text = (a.find_parent() or a).get_text(" ", strip=True) + if version in parent_text and re.search(r"post-download|/download", a["href"]): + dl_page = a["href"] + break + if not dl_page: + a = s.find("a", href=re.compile(r"post-download|/download")) + if a: + dl_page = a["href"] + if not dl_page: + log("no download page link found") + return None + if not dl_page.startswith("http"): + dl_page = urljoin(base, dl_page) + log(f"dl_page={dl_page}") + + s2 = soup(dl_page) + final = None + if s2: + btn = (s2.find("a", id="detail-download-button") or + s2.find("a", attrs={"data-url": True}) or + s2.find("a", href=re.compile(r"\.apk"))) + if btn: + final = btn.get("href") or btn.get("data-url", "") + if not final: + meta = s2.find("meta", attrs={"http-equiv": "refresh"}) + if meta: + m = re.search(r"url=(.+)", meta.get("content", ""), re.I) + if m: + final = m.group(1).strip() + if not final: + log("falling back to dl_page as direct download") + final = dl_page + if not final.startswith("http"): + final = urljoin(base, final) + log(f"final={final[:80]}") + + dest = DOWNLOAD_DIR / f"{package}.apk" + return dest if download_apk(final, dest) else None + + +# ── Source 3 : APKCombo ─────────────────────────────────────────────────────── + +def src_apkcombo(app): + log("[APKCombo]") + package = app["package"] + version = app.get("version") + base_url = app.get("apkcombo_url", f"https://apkcombo.com/apk/{package}") + + url = f"{base_url}/{version}" if version else base_url + log(f"url={url}") + s = soup(url) or soup(base_url) + if not s: + return None + + final = None + for a in s.find_all("a", href=True): + if re.search(r"\.apk(\?|$)", a["href"]): + final = a["href"] + break + if not final: + for tag in s.find_all(attrs={"data-src": re.compile(r"\.apk")}): + final = tag["data-src"] + break + if not final: + btn = s.find("a", class_=re.compile(r"download", re.I)) + if btn: + final = btn.get("href", "") + if not final: + log("no download link found") + return None + if not final.startswith("http"): + final = "https://apkcombo.com" + final + log(f"final={final[:80]}") + + dest = DOWNLOAD_DIR / f"{package}.apk" + return dest if download_apk(final, dest) else None + + +# ── GitHub Release helpers ──────────────────────────────────────────────────── + +def gh_headers(): + return { + "Authorization": f"Bearer {GITHUB_TOKEN}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + + +def get_or_create_release(): + base = f"https://api.github.com/repos/{GITHUB_REPO}" + r = requests.get(f"{base}/releases/tags/{RELEASE_TAG}", headers=gh_headers()) + if r.status_code == 200: + d = r.json() + return d["id"], d["upload_url"] + log("Release not found, creating...") + r = requests.post(f"{base}/releases", headers=gh_headers(), json={ + "tag_name": RELEASE_TAG, + "name": "APK Mirror", + "body": "Auto-scraped APKs for Morphe patching.", + "prerelease": False, + }) + r.raise_for_status() + d = r.json() + return d["id"], d["upload_url"] + + +def list_assets(release_id): + r = requests.get( + f"https://api.github.com/repos/{GITHUB_REPO}/releases/{release_id}/assets", + headers=gh_headers(), + ) + r.raise_for_status() + return {a["name"]: a["id"] for a in r.json()} + + +def delete_asset(asset_id): + requests.delete( + f"https://api.github.com/repos/{GITHUB_REPO}/releases/assets/{asset_id}", + headers=gh_headers(), + ) + + +def upload_asset(upload_url, path): + url = re.sub(r"\{.*?\}", "", upload_url) + h = {**gh_headers(), "Content-Type": "application/vnd.android.package-archive"} + log(f"uploading {path.name} ({path.stat().st_size // 1024 // 1024} MB) ...") + with open(path, "rb") as f: + r = requests.post(url, headers=h, params={"name": path.name}, + data=f, timeout=600) + if r.status_code in (200, 201): + dl = r.json().get("browser_download_url", "") + ok(f"uploaded → {dl}") + return dl + err(f"upload failed {r.status_code}: {r.text[:200]}") + return "" + + +# ── per-app orchestration ───────────────────────────────────────────────────── + +def process_app(app, release_id, upload_url): + name = app["name"] + package = app["package"] + version = app.get("version") or "latest" + banner(f"{name} | {package} | v{version}") + + # try sources in order + apk_path = None + for label, fn in [("APKMirror", src_apkmirror), + ("Uptodown", src_uptodown), + ("APKCombo", src_apkcombo)]: + try: + result = fn(app) + if result and result.exists(): + ok(f"got APK via {label}") + apk_path = result + break + except Exception as e: + err(f"{label} exception: {e}") + time.sleep(2) + + if not apk_path: + err(f"ALL sources failed for {name}") + return False + + # replace old asset in release if present + existing = list_assets(release_id) + if apk_path.name in existing: + log(f"deleting old asset {apk_path.name}") + delete_asset(existing[apk_path.name]) + + dl_url = upload_asset(upload_url, apk_path) + + # delete local file immediately after upload + apk_path.unlink(missing_ok=True) + log(f"deleted local {apk_path.name}") + + return bool(dl_url) + + +# ── final verification ──────────────────────────────────────────────────────── + +def verify_all(apps, release_id): + banner("VERIFICATION") + assets = list_assets(release_id) + missing = [] + present = [] + for app in apps: + fname = f"{app['package']}.apk" + if fname in assets: + ok(fname) + present.append(fname) + else: + err(f"MISSING: {fname}") + missing.append(app["name"]) + print() + print(f" Present : {len(present)}/{len(apps)}") + if missing: + print(f" Missing : {', '.join(missing)}") + return missing + + +# ── main ────────────────────────────────────────────────────────────────────── + +def main(): + if not GITHUB_TOKEN: + print("ERROR: GITHUB_TOKEN environment variable not set.") + print("Run: export GITHUB_TOKEN=your_token") + sys.exit(1) + + with open(APPS_FILE) as f: + apps = json.load(f) + print(f"[*] Loaded {len(apps)} apps from apps.json") + + release_id, upload_url = get_or_create_release() + print(f"[*] Release id={release_id}") + + failed = [] + for app in apps: + success = process_app(app, release_id, upload_url) + if not success: + failed.append(app["name"]) + time.sleep(3) + + missing = verify_all(apps, release_id) + + banner("SUMMARY") + total = len(apps) + ok_cnt = total - len(failed) + print(f" Scraped & uploaded : {ok_cnt}/{total}") + if failed: + print(f" Failed : {', '.join(failed)}") + if missing: + print(f" Missing in release : {', '.join(missing)}") + sys.exit(1) + else: + ok(f"All {total} APKs verified in GitHub release!") + print(f" Release: https://github.com/{GITHUB_REPO}/releases/tag/{RELEASE_TAG}") + + +if __name__ == "__main__": + main() diff --git a/uptodown.py b/uptodown.py new file mode 100644 index 0000000..9266659 --- /dev/null +++ b/uptodown.py @@ -0,0 +1,207 @@ +""" +UptoDown APK scraper — works for ALL apps including Instagram +that are blocked on APKMirror by Cloudflare Turnstile. + +URL pattern: https://[app-slug].en.uptodown.com/android/download +Download URL: https://dw.uptodown.com/dwn/[token] +""" + +import time +import re +from urllib.parse import quote_plus +from bs4 import BeautifulSoup +import cloudscraper + + +# Map known app names to their UptoDown slugs +APP_SLUGS = { + "YouTube": "youtube", + "YouTube Music": "youtube-music", + "X": "twitter", + "Instagram": "instagram", + "TikTok": "tik-tok", + "WhatsApp": "whatsapp", + "Snapchat": "snapchat", + "Facebook": "facebook", + "Telegram": "telegram", + "Spotify": "spotify", +} + + +class UptoDown: + def __init__(self, timeout=2): + self.timeout = timeout + self.base_url = "https://en.uptodown.com" + self.dw_base = "https://dw.uptodown.com/dwn" + self.headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", + "Accept-Language": "en-US,en;q=0.9", + "Referer": "https://en.uptodown.com/", + } + self.scraper = cloudscraper.create_scraper() + + def _get(self, url): + time.sleep(self.timeout) + return self.scraper.get(url, headers=self.headers, timeout=15) + + def get_app_slug(self, app_name): + """Get the UptoDown slug: check known list first, then search UptoDown.""" + slug = APP_SLUGS.get(app_name) + if slug: + return slug + + # Try searching UptoDown for the app + slug = self._search_slug(app_name) + if slug: + return slug + + # Last resort: guess the slug from the name + return app_name.lower().replace(" ", "-") + + def _search_slug(self, app_name): + """Search UptoDown to find the correct app slug.""" + try: + query = quote_plus(app_name) + # UptoDown search API + url = f"https://en.uptodown.com/android/search/{query}" + r = self.scraper.get(url, headers=self.headers, timeout=10) + if r.status_code != 200: + # Try alternative search format + url = f"https://en.uptodown.com/android/q/{query}" + r = self.scraper.get(url, headers=self.headers, timeout=10) + if r.status_code == 200: + soup = BeautifulSoup(r.text, "html.parser") + # Find first result app link + result = soup.find("a", {"class": "item"}) + if not result: + result = soup.find("div", {"class": "item"}) + if result: + result = result.find("a") + if result and result.get("href"): + href = result["href"] + # Extract slug from URL like https://instagram.en.uptodown.com/android + import re + m = re.search(r"https?://([^.]+)\.en\.uptodown\.com", href) + if m: + return m.group(1) + except Exception as e: + print(f"[uptodown] Search error: {e}") + return None + + def get_download_info(self, app_name, target_version=None): + """Get the specified version (or latest) and direct download URL for an app.""" + slug = self.get_app_slug(app_name) + app_url = f"https://{slug}.en.uptodown.com/android" + + target_version = target_version.strip() if target_version else None + if target_version and target_version.lower() != "latest": + # Search for specific version + versions_url = f"{app_url}/versions" + print(f"[uptodown] Fetching versions from {versions_url}") + r = self._get(versions_url) + if r.status_code == 200: + soup = BeautifulSoup(r.text, "html.parser") + versions_list = soup.find("div", {"id": "versions-items-list"}) + if versions_list: + for item in versions_list.find_all("div", recursive=False): + v_span = item.find("span", {"class": "version"}) + if v_span and target_version in v_span.text.strip(): + v_id = item.get("data-version-id") + if v_id: + download_page_url = f"{app_url}/download/{v_id}" + return self._parse_download_page(app_name, app_url, download_page_url) + print(f"[uptodown] Target version '{target_version}' not found. Falling back to latest.") + + # Default to latest + download_page_url = f"{app_url}/download" + return self._parse_download_page(app_name, app_url, download_page_url) + + def _parse_download_page(self, app_name, app_url, download_page_url): + print(f"[uptodown] Fetching {download_page_url}") + r = self._get(download_page_url) + + if r.status_code != 200: + print(f"[uptodown] Failed: status {r.status_code}") + return None + + soup = BeautifulSoup(r.text, "html.parser") + + # Get the download token from the button + btn = soup.find("button", {"id": "detail-download-button"}) + if not btn: + print("[uptodown] Could not find download button") + return None + + token = btn.get("data-url") + if not token: + print("[uptodown] No download token found") + return None + + # Get version info + version = None + version_el = soup.find("span", {"itemprop": "version"}) + if not version_el: + version_el = soup.find("div", {"class": "version"}) + if version_el: + version = version_el.text.strip() + + direct_url = f"{self.dw_base}/{token}" + + return { + "app_name": app_name, + "version": version or "unknown", + "download_url": direct_url, + "app_page": app_url, + } + + def download(self, app_name, version=None, output_dir="builds", package_name=None): + """Download the latest APK for an app.""" + import os + + info = self.get_download_info(app_name, target_version=version) + if not info: + print(f"[uptodown] Could not get download info for {app_name}") + return None + + version = info["version"] + print(f"[uptodown] Downloading {app_name} {version} from {info['download_url']}") + + # Build filename using package name if provided + os.makedirs(output_dir, exist_ok=True) + if package_name: + filename = os.path.join(output_dir, f"{package_name}-{version}-all.apk") + else: + clean = app_name.replace(" ", "_") + filename = os.path.join(output_dir, f"{clean}-{version}-all.apk") + + # Delete old versions + import glob + if package_name: + old_files = glob.glob(os.path.join(output_dir, f"{package_name}-*-all.*")) + else: + old_files = glob.glob(os.path.join(output_dir, f"{app_name.replace(' ', '_')}-*-all.*")) + for f in old_files: + try: + os.remove(f) + print(f"[uptodown] Removed old: {f}") + except Exception: + pass + + r = self.scraper.get(info["download_url"], headers=self.headers, stream=True, timeout=60) + if r.status_code != 200: + print(f"[uptodown] Download failed: status {r.status_code}") + return None + + # Try to get filename from the URL (e.g. instagram-433-0.0.4-68.apk) + final_url = r.url + url_filename = final_url.split("/")[-1] + if url_filename.endswith(".apk") and package_name: + filename = os.path.join(output_dir, f"{package_name}-{version}-all.apk") + + print(f"[uptodown] Saving to {filename}...") + with open(filename, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + + print(f"[uptodown] Done: {filename}") + return filename