diff --git a/.github/workflows/pep8-enforcement.yaml b/.github/workflows/pep8-enforcement.yaml new file mode 100755 index 0000000..7f146e1 --- /dev/null +++ b/.github/workflows/pep8-enforcement.yaml @@ -0,0 +1,69 @@ +name: PEP 8 Compliance Check + +on: + workflow_dispatch: + push: + branches: + - '**' # Run on pushes to any branch + +jobs: + pep8_check: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # IMPORTANT: Fetch all history for git diff to work correctly + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' # Specify your Python version (e.g., '3.9', '3.10', '3.11') + + - name: Install flake8 + run: pip install flake8 + + - name: Get changed Python files + id: changed-files + run: | + # For push events, compare with the previous commit + # Handle cases where commits might not exist or be accessible + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + # Manual trigger - check all Python files + CHANGED_PYTHON_FILES=$(find . -name "*.py" -type f | tr '\n' ' ') + elif [ "${{ github.event.before }}" = "0000000000000000000000000000000000000000" ]; then + # First commit - check all Python files + CHANGED_PYTHON_FILES=$(find . -name "*.py" -type f | tr '\n' ' ') + else + # Try git diff, fallback to all files if it fails + CHANGED_PYTHON_FILES=$(git diff --name-only --diff-filter=ACM ${{ github.event.before }} ${{ github.sha }} 2>/dev/null | grep '\.py$' | tr '\n' ' ' || find . -name "*.py" -type f | tr '\n' ' ') + fi + + # Ensure we have a value even if empty + if [ -z "$CHANGED_PYTHON_FILES" ]; then + CHANGED_PYTHON_FILES="" + fi + + echo "changed_python_files=${CHANGED_PYTHON_FILES}" >> $GITHUB_OUTPUT + echo "Event type: ${{ github.event_name }}" + echo "Before commit: ${{ github.event.before }}" + echo "Current commit: ${{ github.sha }}" + echo "Found Python files: '$CHANGED_PYTHON_FILES'" + shell: bash + + - name: Run flake8 on changed Python files + if: success() && steps.changed-files.outputs.changed_python_files + run: | + echo "Checking the following files for PEP 8 compliance:" + echo "${{ steps.changed-files.outputs.changed_python_files }}" + echo "" + # Use xargs to properly handle multiple files + echo "${{ steps.changed-files.outputs.changed_python_files }}" | xargs flake8 + shell: bash + + - name: Report success if no Python files were changed (Optional, for clarity) + # This step only runs if the 'Run flake8' step was skipped due to no Python files being found. + # It provides a clear message in the GitHub Actions UI. + if: success() && !steps.changed-files.outputs.changed_python_files + run: echo "No .py files changed in this push. PEP 8 compliance check passed (skipped)." \ No newline at end of file diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index 65f3ed0..dd3ea2d --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,226 @@ -.idea -ENV +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the enitre vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# avoid mac stuff +.DS_Store + +# Ignore all image files *.jpg *.jpeg +*.png + +# Terraform Stuff to Ignore +# Local .terraform directories +**/.terraform/* +**/.venv/ + +# .tfstate files +*.tfstate +*.tfstate.* + +# Crash log files +crash.log +crash.*.log +# other stuff +.vscode +.venv +__pycache__ +secrets.txt +test-media/ +test-results/ +logs/ + +# fiddle and experimentation files +fiddle/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100755 index 0000000..10ae814 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: +- repo: https://github.com/PyCQA/flake8 + rev: 7.3.0 + hooks: + - id: flake8 \ No newline at end of file diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 index fc4aec8..a25eebc --- a/README.md +++ b/README.md @@ -1,32 +1,112 @@ -# mosaic +# Mosaic JTG Fork -This utility can be used to generate [photo-mosaic](http://en.wikipedia.org/wiki/Photographic_mosaic) images, to use it you must have Python installed, along with the [Pillow](http://pillow.readthedocs.org/en/latest/) imaging library. +This is a fork of the [Codebox/mosaic](https://github.com/codebox/mosaic) project. It introduces the following changes and refactors: -As well as an image to use for the photo-mosaic ([most common image formats are supported](http://pillow.readthedocs.org/en/latest/handbook/image-file-formats.html)), you will need a large collection of different images to be used as tiles. The tile images can be any shape or size (the utility will automatically crop and resize them) but for good results you will need a lot of them - a few hundred at least. One convenient way of generating large numbers of tile images is to [extract screenshots from video files](https://trac.ffmpeg.org/wiki/Create%20a%20thumbnail%20image%20every%20X%20seconds%20of%20the%20video) using [ffmpeg](https://www.ffmpeg.org/). +* **File Restructuring:** Refactored the classes of the original project from a single file into multiple, modular files. +* **Media Tools:** Provided a `tools` folder for quick pre-processing of images and slicing videos into image frames. +* **Cloud Data Sync:** Added tools for transferring photos from an Amazon S3 bucket to an EC2 instance and vice versa. +* **OpenCV Implementation:** Introduced a second version of the main application (`mosaic-v2.py`) that leverages a CV2 model rather than raw pixel processing loops. +* **KD-Tree Optimization:** Uses a KD-Tree to optimize tile matching speed, balancing computation time against target quality. -Run the utility from the command line, as follows: +This tool generates [photo-mosaic](http://en.wikipedia.org/wiki/Photographic_mosaic) images. To use it, you must have Python installed along with the [Pillow](http://pillow.readthedocs.org/en/latest/) imaging library. -
python mosaic.py <image> <tiles directory>
-
+While this version is designed assuming the code runs on an AWS EC2 instance with multiple processors, it can still be executed locally. See this blog post for [additional notes on the architecture](https://www.joelgonzaga.com/2026/05/18/photo-mosaics-with-scikit-aws-and-kdtree/). -* The `image` argument should contain the path to the image for which you want to build the mosaic -* The `tiles directory` argument should contain the path to the directory containing the tile images (the directory will be searched recursively, so it doesn't matter if some of the images are contained in sub-directories) +In either case, you will need a source image for the mosaic ([most common image formats are supported](http://pillow.readthedocs.org/en/latest/handbook/image-file-formats.html)). Additionally, you will need a large collection of separate images to be used as tiles. The tile images can be any shape or size (the utility will automatically crop and resize them), but for good results, you will need at least a few hundred. One convenient way to generate a massive tile library is to [extract screenshots from video files](https://trac.ffmpeg.org/wiki/Create%20a%20thumbnail%20image%20every%20X%20seconds%20of%20the%20video) using [FFmpeg](https://www.ffmpeg.org/), which is often the quickest method for local testing. -For example: +--- -
python mosaic.py game_of_thrones_poster.jpg /home/admin/images/screenshots
-
+## Architecture Setup (EC2, S3, and EBS) -The images below show an example of how the mosaic tiles are matched to the details of the original image: +> **Note:** This project does not provision infrastructure automatically, nor is it an exhaustive guide to AWS. Users are assumed to have a working knowledge of cloud resources or the willingness to reference official documentation. Mind your resource usage to manage costs. -![Mosaic Image](https://codebox.net/assets/images/mosaic/mosaic_small.jpg) -Original +Set up the following environment in AWS: +* An **S3 Bucket** +* An **EC2 Instance** +* An **EBS Volume** of a reasonable size relative to your asset library +* An **IAM Role/Instance Profile** allowing the EC2 instance to read/write to the S3 bucket -[![Mosaic Image Detail](https://codebox.net/assets/images/mosaic/mosaic_detail.jpg)](https://codebox.net/assets/images/mosaic/mosaic_large.jpg) -Mosaic Detail (click through for [full mosaic](https://codebox.net/assets/images/mosaic/mosaic_large.jpg) ~15MB) +### S3 Bucket Configuration +The S3 bucket serves as your centralized storage, allowing you to easily stage raw inputs and retrieve completed mosaics. Create the following four folders: +* `tile-videos/` – Source videos to be chopped into mosaic tiles. +* `tile-images/` – Base images to be processed into mosaic tiles. +* `source-images/` – Target images you want transformed into mosaics. +* `result-mosaics/` – The destination folder where the EC2 instance drops completed outputs. -Producing large, highly detailed mosaics can take some time - you should experiment with the various [configuration parameters](https://github.com/codebox/mosaic/blob/master/mosaic.py#L6) explained in the source code to find the right balance between image quality and render time. +### EC2 and EBS Configuration +Using an EC2 instance optimized for compute and memory delivers the best performance, depending on your tile dataset size. +* **4 vCPUs / 32 GB RAM** safely handles up to ~100k tiles without bottlenecking memory. +* **8 vCPUs / 64 GB RAM** is recommended for datasets scaling past 200k tiles. -In particular the [TILE_MATCH_RES](https://github.com/codebox/mosaic/blob/master/mosaic.py#L8) parameter can have a big impact on both these factors - its value determines how closely the program examines each tile when trying to find the best fit for a particular segment of the image. Setting TILE_MATCH_RES to '1' simply finds the average colour of each tile, and picks the one that most closely matches the average colour of the image segment. As the value is increased, the tile is examined in more detail. Setting TILE_MATCH_RES to equal TILE_SIZE will cause the utility to examine each pixel in the tile individually, producing the best possible match (during my testing I didn't find a very noticeable improvement beyond a value of 5, but YMMV). +The EC2 instance needs Python 3, `pip`, and public internet access to clone the project. Your attached EBS volume requires sufficient I/O performance depending on project scope. -By default the utility will configure itself to use all available CPUs/CPU-cores on the host system, if you want to leave some processing power spare for other tasks then adjust the [WORKER_COUNT](https://github.com/codebox/mosaic/blob/master/mosaic.py#L12) parameter accordingly. +#### Environment Setup +Clone this repository, ensure your storage volume is mounted, and install dependencies: + +```bash +pip3 install -r requirements.txt +``` + +*(Note: Some OpenCV dependencies may take a moment to compile and install.* + +The automation scripts assume an active directory path exists at /mnt/ebs/. Ensure this directory is created on your mounted volume and has read/write permissions enabled. + +## Using The Tools +The tools directory contains several utility scripts. Remote into your EC2 instance via SSH and use the --help flag to inspect argument configurations: +```bash +python3 tools/splicer --help +``` + +## Tools Summary +- **advanced_parse.py:** An alternate method for turning videos into tile components. It captures several cropped square screenshots per frame. Use with caution due to the high volume of files generated. + +- **s3_access.py:** A support file containing classes and helper methods to authenticate and connect to Amazon S3. + +- **load_data_from_s3.py:** Pulls processing assets down from your S3 bucket to the local EBS drive and pushes completed renders back to S3. + +- **splicer.py:** Slices raw video files into individual frames, then processes rectangular image frames into cropped square tiles. If your inputs are raw videos, you will need to execute this script for both steps. + +## Making Mosaics +To use the refactored implementation of the original pixel-matching Codebox codebase, run: +```bash +python mosaic.py -file /path/to/source.jpeg -tiles /path/to/tiles_folder +``` +To run the OpenCV implementation with KD-Tree optimizations, execute: +```bash +python mosaic-v2.py -file /path/to/source.jpeg -tiles /path/to/tiles_folder +``` + +A third alternative, `mosaic-nokd.py`, is available for comparative testing but features the longest execution time. + +# Version Differences +`mosaic.py` + +Functionally identical to the original Codebox engine. The CLI interface has been modified slightly for consistency, but it matches tiles to the target source image using the exact same absolute pixel-difference calculation loops. + +- *Matching Behavior:* To find the optimal match for any given block, it scans every single tile in the target directory. + +`mosaic-v2.py` + +Matches tiles to the source image using an OpenCV computer vision approach alongside a KD-Tree to index and cluster your tile assets by their average color. + +- *Matching Behavior:* When analyzing a block, it queries the KD-Tree to scan only a localized subset of optimal candidate tiles instead of the whole directory, drastically increasing rendering speeds. + +- *Tile Cool-down/Randomization:* Includes a reuse penalty feature to prevent identical tiles from stacking right next to each other. The -penalty option defaults to 0.2. Higher values discourage immediate tile reuse; setting -penalty 0.0 disables randomization completely. + +- *Naming Options:* Features customizable output naming flags. Run with --help to view suffix and export formatting options. + +`mosaic-nokd.py` + +Maintains the OpenCV processing logic found in v2, but strips away the KD-Tree color-grouping index. + +- *Matching Behavior:* Because it skips the KD-Tree index and uses OpenCV, every single tile is scanned sequentially for every block. This is universally the slowest processing option of the three. + +# Final Thoughts + + - Running this workflow in the cloud is highly recommended due to high CPU/RAM utilization, though high-spec local workstations work perfectly as well. + + - Experiment with the -penalty float parameters and alternate script versions to find the structural and visual balance you prefer. + + - When using splicer.py configured to sample one frame per second, an average feature-length film yields roughly 26k–32k tiles. + + - Scale Warning: The larger your asset library grows, the more memory the program requires to map out the lookup array or KD-Tree structures. \ No newline at end of file diff --git a/downloads.py b/downloads.py new file mode 100755 index 0000000..2646051 --- /dev/null +++ b/downloads.py @@ -0,0 +1,56 @@ +################################################## +# This script is intend to be run locally to get +# resulting images from s3. +################################################### + +import boto3 +import os +import argparse + + +def download_s3_folder(bucket_name, prefix, local_dir, region): + s3 = boto3.client('s3', region_name=region) + + paginator = s3.get_paginator('list_objects_v2') + params = {'Bucket': bucket_name, 'Prefix': prefix} + + os.makedirs(local_dir, exist_ok=True) + + for page in paginator.paginate(**params): + if 'Contents' not in page: + print("No files found with the given prefix.") + return + for obj in page['Contents']: + key = obj['Key'] + if key.endswith('/'): + # Skip folders + continue + + # Create the full local path + relative_path = os.path.relpath(key, prefix) + local_file_path = os.path.join(local_dir, relative_path) + local_file_dir = os.path.dirname(local_file_path) + os.makedirs(local_file_dir, exist_ok=True) + + print(f"Downloading {key} to {local_file_path}") + s3.download_file(bucket_name, key, local_file_path) + + +def main(): + parser = argparse.ArgumentParser(description='Download S3 \ + files to local directory.') + parser.add_argument('--bucket', + required=True, help='Name of the S3 bucket.') + parser.add_argument('--region', + default='us-west-2', + help='AWS region of the S3 bucket. Default: us-west-2') + parser.add_argument('--prefix', required=True, + help='Prefix (folder) in S3 to download.') + + args = parser.parse_args() + + download_s3_folder(args.bucket, args.prefix, 'test-results', args.region) + + +if __name__ == "__main__": + main() diff --git a/load-and-run.sh b/load-and-run.sh new file mode 100644 index 0000000..a0af5b1 --- /dev/null +++ b/load-and-run.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +export S3_STORAGE="replace-with-bucket" + +# Explicitly use the python binary inside your virtual environment to ensure stability +VENV_PYTHON=".venv/bin/python3" + +echo "=== Starting Pipeline at $(date) ===" > pipeline_status.log + +# 1. Make the directory that everything saves to and reads from. +#echo "Creating Source directory." >> pipeline_status.log +#mkdir -p /mnt/ebs || { echo "ERROR: Failed to create directory /mnt/ebs" >> pipeline_status.log; exit 1; } +#chmod 777 /mnt/ebs || { echo "ERROR: Failed to 777 /mnt/ebs" >> pipeline_status.log; exit 1; } + +# 2. Download Step (Changed second command to >> to append) +echo "Downloading from videos s3..." >> pipeline_status.log +$VENV_PYTHON -u tools/load_data_from_s3.py --videos > log_0_download.log 2>&1 || { echo "ERROR: could not download videos" >> log_0_download.log; exit 1; } + +echo "Downloading from samples s3..." >> pipeline_status.log +$VENV_PYTHON -u tools/load_data_from_s3.py --samples >> log_0_download.log 2>&1 || { echo "ERROR: could not download source photos" >> log_0_download.log; exit 1; } + +# 3. Preprocessing Steps (Sequential - Changed second to >> and fixed error strings) +echo "Running preprocess step 1 (splicer vids)..." >> pipeline_status.log +$VENV_PYTHON -u tools/splicer.py -video-folder /mnt/ebs/raw_vids -density 1 -out-dir /mnt/ebs/raw_photos > log_1_preprocess.log 2>&1 || { echo "ERROR: failed to splice videos" >> log_1_preprocess.log; exit 1; } + +echo "Running preprocess step 2 (splicer photos)..." >> pipeline_status.log +$VENV_PYTHON -u tools/splicer.py -image-folder /mnt/ebs/raw_photos >> log_1_preprocess.log 2>&1 || { echo "ERROR: failed to process photos" >> log_1_preprocess.log; exit 1; } + +# 4. Main Processing Iterations (Sequential - Using unique log files per variation for easier debugging) +echo "Running process with legacy method..." >> pipeline_status.log +$VENV_PYTHON -u mosaicizers/mosaic.py -folder /mnt/ebs/samples > log_2_legacy.log 2>&1 || { echo "ERROR: could not run legacy mosaics.py" >> log_2_legacy.log; exit 1; } + +echo "Running main script v2 with no penalty..." >> pipeline_status.log +$VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.0 -suffix _p00_v2 > log_3_v2_p00.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p00" >> log_3_v2_p00.log; exit 1; } + +echo "Running main script v2 with low penalty..." >> pipeline_status.log +$VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.02 -suffix _p02_v2 > log_3_v2_p02.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p02" >> log_3_v2_p05.log; exit 1; } + +echo "Running main script v2 with high penalty..." >> pipeline_status.log +$VENV_PYTHON -u mosaicizers/mosaic-v2.py -folder /mnt/ebs/samples -penalty 0.08 -suffix _p08_v2 > log_3_v2_p08.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p08" >> log_3_v2_p15.log; exit 1; } + +echo "Running No KD version of Sci kit with 00 penalty." >> pipeline_status.log +$VENV_PYTHON -u mosaicizers/mosaic-nokd.py -folder /mnt/ebs/samples -penalty 0.0 -suffix _p00_v2 > log_3_v2_p08.log 2>&1 || { echo "ERROR: could not run mosaics-v2.py p08" >> log_3_v2_p15.log;} + +echo "Uploading results!" >> pipeline_status.log +$VENV_PYTHON - u tools/load_data_from_s3 --upload-reulst > log_4_upload.log 2>&1 || { echo "ERROR: could not upload results " >> log_3_v2_p15.log; exit 1; } +echo "=== Pipeline Finished Successfully at $(date) ===" >> pipeline_status.log \ No newline at end of file diff --git a/mosaic.py b/mosaic.py deleted file mode 100644 index 6c5bda2..0000000 --- a/mosaic.py +++ /dev/null @@ -1,238 +0,0 @@ -import sys -import os, os.path -from PIL import Image, ImageOps -from multiprocessing import Process, Queue, cpu_count - -# Change these 3 config parameters to suit your needs... -TILE_SIZE = 50 # height/width of mosaic tiles in pixels -TILE_MATCH_RES = 5 # tile matching resolution (higher values give better fit but require more processing) -ENLARGEMENT = 8 # the mosaic image will be this many times wider and taller than the original - -TILE_BLOCK_SIZE = TILE_SIZE / max(min(TILE_MATCH_RES, TILE_SIZE), 1) -WORKER_COUNT = max(cpu_count() - 1, 1) -OUT_FILE = 'mosaic.jpeg' -EOQ_VALUE = None - -class TileProcessor: - def __init__(self, tiles_directory): - self.tiles_directory = tiles_directory - - def __process_tile(self, tile_path): - try: - img = Image.open(tile_path) - img = ImageOps.exif_transpose(img) - - # tiles must be square, so get the largest square that fits inside the image - w = img.size[0] - h = img.size[1] - min_dimension = min(w, h) - w_crop = (w - min_dimension) / 2 - h_crop = (h - min_dimension) / 2 - img = img.crop((w_crop, h_crop, w - w_crop, h - h_crop)) - - large_tile_img = img.resize((TILE_SIZE, TILE_SIZE), Image.LANCZOS) - small_tile_img = img.resize((int(TILE_SIZE/TILE_BLOCK_SIZE), int(TILE_SIZE/TILE_BLOCK_SIZE)), Image.LANCZOS) - - return (large_tile_img.convert('RGB'), small_tile_img.convert('RGB')) - except: - return (None, None) - - def get_tiles(self): - large_tiles = [] - small_tiles = [] - - print('Reading tiles from {}...'.format(self.tiles_directory)) - - # search the tiles directory recursively - for root, subFolders, files in os.walk(self.tiles_directory): - for tile_name in files: - print('Reading {:40.40}'.format(tile_name), flush=True, end='\r') - tile_path = os.path.join(root, tile_name) - large_tile, small_tile = self.__process_tile(tile_path) - if large_tile: - large_tiles.append(large_tile) - small_tiles.append(small_tile) - - - print('Processed {} tiles.'.format(len(large_tiles))) - - return (large_tiles, small_tiles) - -class TargetImage: - def __init__(self, image_path): - self.image_path = image_path - - def get_data(self): - print('Processing main image...') - img = Image.open(self.image_path) - w = img.size[0] * ENLARGEMENT - h = img.size[1] * ENLARGEMENT - large_img = img.resize((w, h), Image.LANCZOS) - w_diff = (w % TILE_SIZE)/2 - h_diff = (h % TILE_SIZE)/2 - - # if necessary, crop the image slightly so we use a whole number of tiles horizontally and vertically - if w_diff or h_diff: - large_img = large_img.crop((w_diff, h_diff, w - w_diff, h - h_diff)) - - small_img = large_img.resize((int(w/TILE_BLOCK_SIZE), int(h/TILE_BLOCK_SIZE)), Image.LANCZOS) - - image_data = (large_img.convert('RGB'), small_img.convert('RGB')) - - print('Main image processed.') - - return image_data - -class TileFitter: - def __init__(self, tiles_data): - self.tiles_data = tiles_data - - def __get_tile_diff(self, t1, t2, bail_out_value): - diff = 0 - for i in range(len(t1)): - #diff += (abs(t1[i][0] - t2[i][0]) + abs(t1[i][1] - t2[i][1]) + abs(t1[i][2] - t2[i][2])) - diff += ((t1[i][0] - t2[i][0])**2 + (t1[i][1] - t2[i][1])**2 + (t1[i][2] - t2[i][2])**2) - if diff > bail_out_value: - # we know already that this isn't going to be the best fit, so no point continuing with this tile - return diff - return diff - - def get_best_fit_tile(self, img_data): - best_fit_tile_index = None - min_diff = sys.maxsize - tile_index = 0 - - # go through each tile in turn looking for the best match for the part of the image represented by 'img_data' - for tile_data in self.tiles_data: - diff = self.__get_tile_diff(img_data, tile_data, min_diff) - if diff < min_diff: - min_diff = diff - best_fit_tile_index = tile_index - tile_index += 1 - - return best_fit_tile_index - -def fit_tiles(work_queue, result_queue, tiles_data): - # this function gets run by the worker processes, one on each CPU core - tile_fitter = TileFitter(tiles_data) - - while True: - try: - img_data, img_coords = work_queue.get(True) - if img_data == EOQ_VALUE: - break - tile_index = tile_fitter.get_best_fit_tile(img_data) - result_queue.put((img_coords, tile_index)) - except KeyboardInterrupt: - pass - - # let the result handler know that this worker has finished everything - result_queue.put((EOQ_VALUE, EOQ_VALUE)) - -class ProgressCounter: - def __init__(self, total): - self.total = total - self.counter = 0 - - def update(self): - self.counter += 1 - print("Progress: {:04.1f}%".format(100 * self.counter / self.total), flush=True, end='\r') - -class MosaicImage: - def __init__(self, original_img): - self.image = Image.new(original_img.mode, original_img.size) - self.x_tile_count = int(original_img.size[0] / TILE_SIZE) - self.y_tile_count = int(original_img.size[1] / TILE_SIZE) - self.total_tiles = self.x_tile_count * self.y_tile_count - - def add_tile(self, tile_data, coords): - img = Image.new('RGB', (TILE_SIZE, TILE_SIZE)) - img.putdata(tile_data) - self.image.paste(img, coords) - - def save(self, path): - self.image.save(path) - -def build_mosaic(result_queue, all_tile_data_large, original_img_large): - mosaic = MosaicImage(original_img_large) - - active_workers = WORKER_COUNT - while True: - try: - img_coords, best_fit_tile_index = result_queue.get() - - if img_coords == EOQ_VALUE: - active_workers -= 1 - if not active_workers: - break - else: - tile_data = all_tile_data_large[best_fit_tile_index] - mosaic.add_tile(tile_data, img_coords) - - except KeyboardInterrupt: - pass - - mosaic.save(OUT_FILE) - print('\nFinished, output is in', OUT_FILE) - -def compose(original_img, tiles): - print('Building mosaic, press Ctrl-C to abort...') - original_img_large, original_img_small = original_img - tiles_large, tiles_small = tiles - - mosaic = MosaicImage(original_img_large) - - all_tile_data_large = [list(tile.getdata()) for tile in tiles_large] - all_tile_data_small = [list(tile.getdata()) for tile in tiles_small] - - work_queue = Queue(WORKER_COUNT) - result_queue = Queue() - - try: - # start the worker processes that will build the mosaic image - Process(target=build_mosaic, args=(result_queue, all_tile_data_large, original_img_large)).start() - - # start the worker processes that will perform the tile fitting - for n in range(WORKER_COUNT): - Process(target=fit_tiles, args=(work_queue, result_queue, all_tile_data_small)).start() - - progress = ProgressCounter(mosaic.x_tile_count * mosaic.y_tile_count) - for x in range(mosaic.x_tile_count): - for y in range(mosaic.y_tile_count): - large_box = (x * TILE_SIZE, y * TILE_SIZE, (x + 1) * TILE_SIZE, (y + 1) * TILE_SIZE) - small_box = (x * TILE_SIZE/TILE_BLOCK_SIZE, y * TILE_SIZE/TILE_BLOCK_SIZE, (x + 1) * TILE_SIZE/TILE_BLOCK_SIZE, (y + 1) * TILE_SIZE/TILE_BLOCK_SIZE) - work_queue.put((list(original_img_small.crop(small_box).getdata()), large_box)) - progress.update() - - except KeyboardInterrupt: - print('\nHalting, saving partial image please wait...') - - finally: - # put these special values onto the queue to let the workers know they can terminate - for n in range(WORKER_COUNT): - work_queue.put((EOQ_VALUE, EOQ_VALUE)) - -def show_error(msg): - print('ERROR: {}'.format(msg)) - -def mosaic(img_path, tiles_path): - image_data = TargetImage(img_path).get_data() - tiles_data = TileProcessor(tiles_path).get_tiles() - if tiles_data[0]: - compose(image_data, tiles_data) - else: - show_error("No images found in tiles directory '{}'".format(tiles_path)) - -if __name__ == '__main__': - if len(sys.argv) < 3: - show_error('Usage: {} \r'.format(sys.argv[0])) - else: - source_image = sys.argv[1] - tile_dir = sys.argv[2] - if not os.path.isfile(source_image): - show_error("Unable to find image file '{}'".format(source_image)) - elif not os.path.isdir(tile_dir): - show_error("Unable to find tile directory '{}'".format(tile_dir)) - else: - mosaic(source_image, tile_dir) - diff --git a/mosaicizers/MosaicImage.py b/mosaicizers/MosaicImage.py new file mode 100755 index 0000000..0d98630 --- /dev/null +++ b/mosaicizers/MosaicImage.py @@ -0,0 +1,70 @@ +import io +import os +import hashlib +from PIL import Image +from ProgressCounter import ProgressCounter + + +class MosaicImage: + def __init__(self, original_img, tile_size=50): + self.image = Image.new(original_img.mode, original_img.size) + self.x_tile_count = int(original_img.size[0] / tile_size) + self.y_tile_count = int(original_img.size[1] / tile_size) + self.total_tiles = self.x_tile_count * self.y_tile_count + self.tile_size = tile_size + + def add_tile(self, tile_data, coords): + img = Image.new('RGB', (self.tile_size, self.tile_size)) + img.putdata(tile_data) + self.image.paste(img, coords) + + def save(self, output_dir='/mnt/ebs/mosaics', suffix=''): + """ + Saves the image_obj as a .jpeg to /mnt/ebs/mosaics + using its MD5 hash as the filename. + """ + + # Ensure the output directory exists + os.makedirs(output_dir, exist_ok=True) + + # 1. Convert image to bytes to calculate hash + # We save to a temporary buffer or use the raw data + img_byte_arr = io.BytesIO() + self.image.save(img_byte_arr, format='JPEG') + img_bytes = img_byte_arr.getvalue() + + md5_hash = hashlib.md5(img_bytes).hexdigest() + + filename = f"{md5_hash}{suffix}.jpeg" + final_path = os.path.join(output_dir, filename) + + with open(final_path, "wb") as f: + f.write(img_bytes) + + print(f"Mosaic saved to: {final_path}") + return final_path + + def assemble(self, result_queue, all_tile_data_large, + worker_count): + """ + Monitors the result_queue and assembles the image in real-time. + Replaces the standalone build_mosaic function. + """ + print('\nAssembling mosaic blocks...') + progress = ProgressCounter(self.total_tiles) + active_workers = worker_count + EOQ_VALUE = None # Sentinel value to indicate end of queue + + while active_workers > 0: + try: + img_coords, best_fit_tile_index = result_queue.get() + + if img_coords == EOQ_VALUE: + active_workers -= 1 + else: + tile_data = all_tile_data_large[best_fit_tile_index] + self.add_tile(tile_data, img_coords) + progress.update() + except KeyboardInterrupt: + print('\nInterrupt detected, saving progress...') + break diff --git a/mosaicizers/ProcessTimer.py b/mosaicizers/ProcessTimer.py new file mode 100755 index 0000000..071aca1 --- /dev/null +++ b/mosaicizers/ProcessTimer.py @@ -0,0 +1,30 @@ +from datetime import datetime + + +class ProcessTimer: + def __init__(self, process_name, start_time=None): + self.process_name = process_name + # slight non-standard to make sure "now" is when + # init is invoked. + self.start_time = start_time if start_time else datetime.now() + + print(f"[{self.process_name}] Process started at: \ + {self.start_time.strftime('%Y %b %d %H:%M:%S')}") + + def finish(self): + end_time = datetime.now() + print(f"[{self.process_name}] Process ended at: \ + {end_time.strftime('%Y %b %d %H:%M:%S')}") + + # Calculate duration + duration = end_time - self.start_time + total_seconds = int(duration.total_seconds()) + + hours = total_seconds // 3600 + minutes = (total_seconds % 3600) // 60 + seconds = total_seconds % 60 + total_minutes = total_seconds // 60 + + print(f"Total time was: {hours} hours {minutes} \ + minutes {seconds} seconds \ + ({total_minutes} total minutes)") diff --git a/mosaicizers/ProgressCounter.py b/mosaicizers/ProgressCounter.py new file mode 100755 index 0000000..7c980a6 --- /dev/null +++ b/mosaicizers/ProgressCounter.py @@ -0,0 +1,9 @@ +class ProgressCounter: + def __init__(self, total): + self.total = total + self.counter = 0 + + def update(self): + self.counter += 1 + print("Progress: {:04.1f}%".format(100 * self.counter / self.total), + flush=True, end='\r') diff --git a/mosaicizers/TargetImage.py b/mosaicizers/TargetImage.py new file mode 100755 index 0000000..3b732c1 --- /dev/null +++ b/mosaicizers/TargetImage.py @@ -0,0 +1,37 @@ +from PIL import Image + + +class TargetImage: + def __init__(self, image_path, + tile_size=50, tile_res=5, + enlargement_factor=8): + self.image_path = image_path + self.tile_size = tile_size + self.enlargement_factor = enlargement_factor + self.tile_block_size = tile_size / max(min(tile_res, tile_size), 1) + + def get_data(self): + print('Processing main image...') + img = Image.open(self.image_path) + w = img.size[0] * self.enlargement_factor + h = img.size[1] * self.enlargement_factor + large_img = img.resize((w, h), Image.LANCZOS) + w_diff = (w % self.tile_size) / 2 + h_diff = (h % self.tile_size) / 2 + + # crop the image slightly so we use a whole number of tiles + if w_diff or h_diff: + large_img = large_img.crop( + (w_diff, h_diff, w - w_diff, h - h_diff) + ) + + small_img = large_img.resize( + (int(w / self.tile_block_size), int(h / self.tile_block_size)), + Image.LANCZOS + ) + + image_data = (large_img.convert('RGB'), small_img.convert('RGB')) + + print('Main image processed.') + + return image_data diff --git a/mosaicizers/TileFitterSciKit.py b/mosaicizers/TileFitterSciKit.py new file mode 100755 index 0000000..d8f75bd --- /dev/null +++ b/mosaicizers/TileFitterSciKit.py @@ -0,0 +1,108 @@ +import numpy as np +from skimage.metrics import structural_similarity as ssim +from scipy.spatial import KDTree + + +class TileFitterSciKit: + def __init__(self, tiles_data, match_res=5, penalty=0.02): + # tiles_data here is the 'small_tiles' list from TileProcessor + self.penalty = penalty + self.usages = [0.0 for x in range(len(tiles_data))] + self.tiles_data = tiles_data + self.match_res = match_res + self.winners = np.array([], dtype=float) + + print("Initializing KDTree for hybrid search...") + # 1. Convert tiles to NumPy arrays once + # We reshape them from flat lists back into (5x5x3) blocks for SSIM + self.tiles_np = [ + np.array(t).reshape((self.match_res, self.match_res, 3)) + for t in tiles_data + ] + + # 2. Pre-calculate average colors for the Tree + avg_colors = [t.mean(axis=(0, 1)) for t in self.tiles_np] + self.tree = KDTree(np.array(avg_colors)) + print("KDTree + SSIM Hybrid Fitter Ready.") + + def fit_tiles(self, work_queue, result_queue): + EOQ_VALUE = None # Sentinel + while True: + try: + img_data, img_coords = work_queue.get(True) + if img_data == EOQ_VALUE: + break + tile_index = self.get_best_fit_tile(img_data) + result_queue.put((img_coords, tile_index)) + except KeyboardInterrupt: + pass + + self.get_winning_stats() # print stats + # Signal that this worker is done + result_queue.put((EOQ_VALUE, EOQ_VALUE)) + + def get_best_fit_tile(self, img_data): + """ + img_data: A flat list of pixels (from original code's getdata()) + We convert it to NumPy to use the Tree and SSIM. + """ + # Convert the incoming list to a 5x5x3 array + target_np = np.array(img_data).reshape( + (self.match_res, self.match_res, 3)) + + # Step 1: KDTree Pruning (The "Bucket" step) + target_avg = target_np.mean(axis=(0, 1)) + _, indices = self.tree.query(target_avg, k=100) + + best_raw_score = float('-inf') + best_score = -1 + best_fit_tile_index = indices[0] + + # Step 2: SSIM Refinement + for idx in indices: + candidate_np = self.tiles_np[idx] + + # SSIM needs to know the range of pixel values (0-255) + try: + score = ssim(target_np, + candidate_np, + channel_axis=2, + data_range=255, + win_size=self.match_res-2) + score = score - self.usages[idx] + + except ValueError as e: + # codes sometimes breaks and hangs. + print(f'Got exception {e} \ + skipping index{idx} \ + data was {type(candidate_np)}') + continue + + if score > best_score: + best_score = score + best_fit_tile_index = idx + best_raw_score = score + + # Early exit if we find an amazing match + if score > 0.98: + best_raw_score = score + break + + self.winners = np.append(self.winners, best_raw_score) + self.usages[best_fit_tile_index] = \ + self.usages[best_fit_tile_index] + self.penalty + return best_fit_tile_index + + def get_winning_stats(self): + """Prints statistical analysis of the tile match quality.""" + if self.winners.size == 0: + print("No winners recorded yet.") + return + + print("\n--- Tile Match Quality Stats ---") + print(f"Total Blocks Matched: {len(self.winners)}") + print(f"Mean SSIM: {np.mean(self.winners):.4f}") + print(f"Median SSIM: {np.median(self.winners):.4f}") + print(f"Highest SSIM: {np.max(self.winners):.4f} (Best Match)") + print(f"Lowest SSIM: {np.min(self.winners):.4f} (Worst Match)") + print("--------------------------------\n") diff --git a/mosaicizers/TileFitterSciKitNoKD.py b/mosaicizers/TileFitterSciKitNoKD.py new file mode 100755 index 0000000..369eaad --- /dev/null +++ b/mosaicizers/TileFitterSciKitNoKD.py @@ -0,0 +1,108 @@ +import numpy as np +from skimage.metrics import structural_similarity as ssim +from scipy.spatial import KDTree + + +class TileFitterSciKit: + def __init__(self, tiles_data, match_res=5, penalty=0.00): + # tiles_data here is the 'small_tiles' list from TileProcessor + self.penalty = penalty + self.usages = [0.0 for x in range(len(tiles_data))] + self.tiles_data = tiles_data + self.match_res = match_res + self.winners = np.array([], dtype=float) + + print("Initializing KDTree for hybrid search...") + # 1. Convert tiles to NumPy arrays once + # We reshape them from flat lists back into (5x5x3) blocks for SSIM + self.tiles_np = [ + np.array(t).reshape((self.match_res, self.match_res, 3)) + for t in tiles_data + ] + + # 2. Pre-calculate average colors for the Tree + avg_colors = [t.mean(axis=(0, 1)) for t in self.tiles_np] + self.tree = KDTree(np.array(avg_colors)) + print("KDTree + SSIM Hybrid Fitter Ready.") + + def fit_tiles(self, work_queue, result_queue): + EOQ_VALUE = None # Sentinel + while True: + try: + img_data, img_coords = work_queue.get(True) + if img_data == EOQ_VALUE: + break + tile_index = self.get_best_fit_tile(img_data) + result_queue.put((img_coords, tile_index)) + except KeyboardInterrupt: + pass + + self.get_winning_stats() # print stats + # Signal that this worker is done + result_queue.put((EOQ_VALUE, EOQ_VALUE)) + + def get_best_fit_tile(self, img_data): + """ + img_data: A flat list of pixels (from original code's getdata()) + We convert it to NumPy to use the Tree and SSIM. + """ + # Convert the incoming list to a 5x5x3 array + target_np = np.array(img_data).reshape( + (self.match_res, self.match_res, 3)) + + # Step 1: KDTree Pruning (The "Bucket" step) + # target_avg = target_np.mean(axis=(0, 1)) + # _, indices = self.tree.query(target_avg, k=100) + + best_raw_score = float('-inf') + best_score = -1 + best_fit_tile_index = 0 + + # Step 2: SSIM Refinement + for idx in range(0, len(self.tiles_np)): + candidate_np = self.tiles_np[idx] + + # SSIM needs to know the range of pixel values (0-255) + try: + score = ssim(target_np, + candidate_np, + channel_axis=2, + data_range=255, + win_size=self.match_res-2) + score = score - self.usages[idx] + + except ValueError as e: + # codes sometimes breaks and hangs. + print(f'Got exception {e} \ + skipping index{idx} \ + data was {type(candidate_np)}') + continue + + if score > best_score: + best_score = score + best_fit_tile_index = idx + best_raw_score = score + + # Early exit if we find an amazing match + if score > 0.98: + best_raw_score = score + break + + self.winners = np.append(self.winners, best_raw_score) + self.usages[best_fit_tile_index] = \ + self.usages[best_fit_tile_index] + self.penalty + return best_fit_tile_index + + def get_winning_stats(self): + """Prints statistical analysis of the tile match quality.""" + if self.winners.size == 0: + print("No winners recorded yet.") + return + + print("\n--- Tile Match Quality Stats ---") + print(f"Total Blocks Matched: {len(self.winners)}") + print(f"Mean SSIM: {np.mean(self.winners):.4f}") + print(f"Median SSIM: {np.median(self.winners):.4f}") + print(f"Highest SSIM: {np.max(self.winners):.4f} (Best Match)") + print(f"Lowest SSIM: {np.min(self.winners):.4f} (Worst Match)") + print("--------------------------------\n") diff --git a/mosaicizers/TileProcessor.py b/mosaicizers/TileProcessor.py new file mode 100755 index 0000000..11a6aa4 --- /dev/null +++ b/mosaicizers/TileProcessor.py @@ -0,0 +1,125 @@ +import os +import pickle +import gzip +from PIL import Image, ImageOps + +from ProcessTimer import ProcessTimer + + +class TileProcessor: + def __init__(self, tiles_directory, + tile_size=50, tile_res=5, + cache_file='/mnt/ebs/TILES_DATA.pkl.gz'): + self.tiles_directory = tiles_directory + self.tile_size = tile_size + self.tile_block_size = tile_size / max(min(tile_res, tile_size), 1) + self.tile_res = tile_res + self.cache_file = cache_file + + def tiles_save(self, data): + """Saves processed tile data to a compressed pickle file.""" + print(f"Caching processed tiles to {self.cache_file}...") + try: + with gzip.open(self.cache_file, 'wb') as f: + pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) + print("Cache saved successfully.") + except Exception as e: + print(f"Failed to save cache: {e}") + + def tiles_load(self): + """Loads processed tile data from the compressed pickle file.""" + print(f"Loading cached tiles from {self.cache_file}...") + try: + with gzip.open(self.cache_file, 'rb') as f: + return pickle.load(f) + except Exception as e: + print(f"Failed to load cache: {e}") + return None + + def get_average_color(self, img_path_or_obj): + """ + The 'Resize Trick': Shrinks an image to 1x1 to find the mean RGB. + Accepts either a file path or an existing PIL Image object. + """ + try: + # If it's a path, open it; otherwise assume it's an Image object + if isinstance(img_path_or_obj, str): + img = Image.open(img_path_or_obj) + else: + img = img_path_or_obj + + img = img.convert('RGB') + # BOX resampling is fast and mathematically accurate for averaging + img_tiny = img.resize((1, 1), resample=Image.Resampling.BOX) + return img_tiny.getpixel((0, 0)) + except Exception as e: + print(f"Error processing image: {e}") + return None + + def __process_tile(self, tile_path): + try: + img = Image.open(tile_path) + img = ImageOps.exif_transpose(img) + + # tiles must be square, so get the largest square that fits inside + w = img.size[0] + h = img.size[1] + min_dimension = min(w, h) + w_crop = (w - min_dimension) / 2 + h_crop = (h - min_dimension) / 2 + img = img.crop((w_crop, h_crop, w - w_crop, h - h_crop)) + + large_tile_img = img.resize((self.tile_size, self.tile_size), + Image.LANCZOS) + small_tile_img = img.resize( + (int(self.tile_size / self.tile_block_size), + int(self.tile_size / self.tile_block_size)), + Image.LANCZOS + ) + + return (large_tile_img.convert('RGB'), + small_tile_img.convert('RGB')) + except Exception as e: + print(f"Error processing tile '{tile_path}': {e}") + exit(1) + return (None, None) + + def get_tiles(self): + large_tiles = [] + small_tiles = [] + count = 0 + exp_threshold = 1 # for logging + print('Reading tiles from {}...'.format(self.tiles_directory)) + + if os.path.exists(self.cache_file): + data = self.tiles_load() + if data: + print(f"Loaded {len(data[0])} tiles from cache.") + return data + + # search the tiles directory recursively + tileTimer = ProcessTimer("Tile Making") + for root, subFolders, files in os.walk(self.tiles_directory): + for tile_name in files: + tile_path = os.path.join(root, tile_name) + large_tile, small_tile = self.__process_tile(tile_path) + if large_tile: + large_tiles.append(large_tile) + small_tiles.append(small_tile) + count += 1 + if count == exp_threshold: + print(f'Processed {count} file(s) so far...') + exp_threshold = exp_threshold * 2 + + print('Processed {} tiles.'.format(len(large_tiles))) + tileTimer.finish() + data = (large_tiles, small_tiles) + self.tiles_save(data) + return data + + +if __name__ == '__main__': + # If run, makes cache if none exist + tp = TileProcessor('/mnt/ebs/frames') + data = tp.get_tiles() + print("Tile KDTree is processed and saved as pickle file") diff --git a/mosaicizers/mosaic-nokd.py b/mosaicizers/mosaic-nokd.py new file mode 100755 index 0000000..469c036 --- /dev/null +++ b/mosaicizers/mosaic-nokd.py @@ -0,0 +1,223 @@ +############################################### +# This file will create a mosaic from CLI. +# This version use scikit-learn's KDTree for +# tile matching and multiprocessing for speed. +# Also optimized memory usage by sharing tile data +# across workers. +############################################### + +import os +import os.path +import argparse +from multiprocessing import Queue, cpu_count, get_context + +# These are the custom imports +from MosaicImage import MosaicImage +from TargetImage import TargetImage +from TileProcessor import TileProcessor +from TileFitterSciKitNoKD import TileFitterSciKit +from ProgressCounter import ProgressCounter +from ProcessTimer import ProcessTimer + +# These are now configed by CLI or class defaults +TILE_SIZE = 50 # height/width of mosaic tiles in pixels +TILE_MATCH_RES = 5 # tile matching resolution +# ENLARGEMENT = 8 # mosaic image will be this many times larger +# Image.MAX_IMAGE_PIXELS = None # Dangerous, but allow it for now + +TILE_BLOCK_SIZE = TILE_SIZE / max(min(TILE_MATCH_RES, TILE_SIZE), 1) +WORKER_COUNT = max(cpu_count() - 1, 1) +# OUT_FILE = 'mosaic.jpeg' +EOQ_VALUE = None + + +_global_fitter = None # Shared across all workers + + +def worker_init(tile_data, penalty): + """This runs ONCE when each worker process starts.""" + global _global_fitter + _global_fitter = TileFitterSciKit(tile_data, penalty=penalty) + + +def worker_task(work_queue, result_queue): + """The actual loop the worker runs.""" + # Use the global fitter already sitting in this process's memory + _global_fitter.fit_tiles(work_queue, result_queue) + + +def compose(original_img, tiles, penalty=0.2, suffix=''): + print('Building mosaic, press Ctrl-C to abort...') + original_img_large, original_img_small = original_img + tiles_large, tiles_small = tiles + + # 1. Initialize our mosaic object + mosaic = MosaicImage(original_img_large) + + all_tile_data_large = [list(tile.getdata()) for tile in tiles_large] + all_tile_data_small = [list(tile.getdata()) for tile in tiles_small] + + work_queue = Queue() + result_queue = Queue() + + # 2a Init the Global Fitter. + worker_init(all_tile_data_small, penalty) + ctx = get_context('fork') # from an import at the top + worker_pool = [] + # 2b Rally workers. + for n in range(WORKER_COUNT): + p = ctx.Process( + target=worker_task, + args=(work_queue, result_queue) + ) + p.start() + worker_pool.append(p) + + try: + # 3. Phase 1: Dispatch work (The Producer) + progress = ProgressCounter(mosaic.x_tile_count * mosaic.y_tile_count) + for x in range(mosaic.x_tile_count): + for y in range(mosaic.y_tile_count): + # ... [Your existing cropping logic here] ... + large_box = (x * TILE_SIZE, y * TILE_SIZE, + (x + 1) * TILE_SIZE, (y + 1) * TILE_SIZE) + small_box = (x * TILE_SIZE / TILE_BLOCK_SIZE, + y * TILE_SIZE / TILE_BLOCK_SIZE, + (x + 1) * TILE_SIZE / TILE_BLOCK_SIZE, + (y + 1) * TILE_SIZE / TILE_BLOCK_SIZE) + + work_queue.put( + (list(original_img_small.crop(small_box).getdata()), + large_box)) + progress.update() + + # 4. Phase 2: Collect and Paste (The Consumer) + # We call this in the MAIN process. It will block here until + # the workers finish sending results through the result_queue. + + except KeyboardInterrupt: + print('\nHalting, saving partial image please wait...') + # We tell the workers to stop + for n in range(WORKER_COUNT): + work_queue.put((EOQ_VALUE, EOQ_VALUE)) + + finally: + # Ensure workers are cleaned up + for p in worker_pool: + if p.is_alive(): + work_queue.put((EOQ_VALUE, EOQ_VALUE)) + + mosaic.assemble(result_queue, all_tile_data_large, + WORKER_COUNT) + mosaic.save(suffix=suffix) + + +def show_error(msg): + print('ERROR: {}'.format(msg)) + + +def mosaic(img_path, tiles_data, penalty=0.2, suffix=''): + """ Takes in Tiles Data as an Agrument now """ + image_data = TargetImage(img_path).get_data() + if tiles_data[0]: + imgTimer = ProcessTimer('Single Image ') + compose(image_data, tiles_data, penalty=penalty, suffix=suffix) + imgTimer.finish() + else: + show_error("Tiles Data not propery formatted!") + + +if __name__ == '__main__': + def restricted_float(x): + try: + x = float(x) + except ValueError: + raise argparse.ArgumentTypeError(f"{x} is not a \ + floating-point number") + + if x < 0.0 or x > 0.5: + raise argparse.ArgumentTypeError(f"{x} is not in range \ + [0.01, 0.5]") + return x + + parser = argparse.ArgumentParser( + description="Generate a high-quality mosaic.") + + # Create the mutually exclusive group for input + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("-file", "-f", help="Path to the source image file.") + group.add_argument("-folder", help="Path to a \ + folder of images (not yet implemented).") + + # The tiles directory with a default value + parser.add_argument("-tiles", "-t", + default="/mnt/ebs/frames", + help="Path to the directory \ + containing tiles (default: /mnt/ebs/frames)") + + parser.add_argument("-out_dir", "-o", + default="/mnt/ebs/mosaics", + help="This is the directory the \ + Mosaics will be save to.") + + parser.add_argument('-suffix', '-s', + help="Type something here if \ + if you want it appended \ + to the file name.") + + parser.add_argument('-penalty', + type=restricted_float, + default=0.2, + help="Set the penalty (range: 0.0 to 0.5, \ + default: 0.2) \ + High Penalty means less repetition of tiles") + args = parser.parse_args() + + # Current logic: Only handle the single file mode + if args.file: + source_image = os.path.abspath(args.file) + tile_dir = os.path.abspath(args.tiles) + + if not os.path.isfile(source_image): + show_error(f"Unable to find image file '{source_image}'") + elif not os.path.isdir(tile_dir): + show_error(f"Unable to find tile directory '{tile_dir}'") + else: + # Trigger the mosaic process + tiles_data = TileProcessor(tile_dir).get_tiles() + mosaic(source_image, tiles_data, + penalty=args.penalty, suffix=args.suffix) + + elif args.folder: + abs_folder = os.path.abspath(args.folder) + tile_dir = os.path.abspath(args.tiles) + try: + samples = [e.path for e in os.scandir(abs_folder) + if e.is_file()] + except FileNotFoundError: + print(f"Error: Folder '{abs_folder}' not found.") + exit(1) + try: + tp = TileProcessor(tile_dir) + tiles_data = tp.get_tiles() + except FileNotFoundError: + print(f"Error: Tile directory '{tile_dir}' not found.") + exit(1) + except Exception as e: + print(f"Error running TileProcessor class '{tile_dir}': {e}") + exit(1) + folderTimer = ProcessTimer('imgs by in folder') + for file_path in samples: + if not os.path.isfile(file_path): + show_error(f"Unable to find image file \ + '{file_path}'") + continue + elif not os.path.isdir(tile_dir): + show_error(f"Unable to find tile directory \ + '{tile_dir}'") + continue + else: + # Trigger the mosaic process + mosaic(file_path, tiles_data, + penalty=args.penalty, suffix=args.suffix) + folderTimer.finish() diff --git a/mosaicizers/mosaic-v2.py b/mosaicizers/mosaic-v2.py new file mode 100755 index 0000000..888ee9c --- /dev/null +++ b/mosaicizers/mosaic-v2.py @@ -0,0 +1,223 @@ +############################################### +# This file will create a mosaic from CLI. +# This version use scikit-learn's KDTree for +# tile matching and multiprocessing for speed. +# Also optimized memory usage by sharing tile data +# across workers. +############################################### + +import os +import os.path +import argparse +from multiprocessing import Queue, cpu_count, get_context + +# These are the custom imports +from MosaicImage import MosaicImage +from TargetImage import TargetImage +from TileProcessor import TileProcessor +from TileFitterSciKit import TileFitterSciKit +from ProgressCounter import ProgressCounter +from ProcessTimer import ProcessTimer + +# These are now configed by CLI or class defaults +TILE_SIZE = 50 # height/width of mosaic tiles in pixels +TILE_MATCH_RES = 5 # tile matching resolution +# ENLARGEMENT = 8 # mosaic image will be this many times larger +# Image.MAX_IMAGE_PIXELS = None # Dangerous, but allow it for now + +TILE_BLOCK_SIZE = TILE_SIZE / max(min(TILE_MATCH_RES, TILE_SIZE), 1) +WORKER_COUNT = max(cpu_count() - 1, 1) +# OUT_FILE = 'mosaic.jpeg' +EOQ_VALUE = None + + +_global_fitter = None # Shared across all workers + + +def worker_init(tile_data, penalty): + """This runs ONCE when each worker process starts.""" + global _global_fitter + _global_fitter = TileFitterSciKit(tile_data, penalty=penalty) + + +def worker_task(work_queue, result_queue): + """The actual loop the worker runs.""" + # Use the global fitter already sitting in this process's memory + _global_fitter.fit_tiles(work_queue, result_queue) + + +def compose(original_img, tiles, penalty=0.2, suffix=''): + print('Building mosaic, press Ctrl-C to abort...') + original_img_large, original_img_small = original_img + tiles_large, tiles_small = tiles + + # 1. Initialize our mosaic object + mosaic = MosaicImage(original_img_large) + + all_tile_data_large = [list(tile.getdata()) for tile in tiles_large] + all_tile_data_small = [list(tile.getdata()) for tile in tiles_small] + + work_queue = Queue() + result_queue = Queue() + + # 2a Init the Global Fitter. + worker_init(all_tile_data_small, penalty) + ctx = get_context('fork') # from an import at the top + worker_pool = [] + # 2b Rally workers. + for n in range(WORKER_COUNT): + p = ctx.Process( + target=worker_task, + args=(work_queue, result_queue) + ) + p.start() + worker_pool.append(p) + + try: + # 3. Phase 1: Dispatch work (The Producer) + progress = ProgressCounter(mosaic.x_tile_count * mosaic.y_tile_count) + for x in range(mosaic.x_tile_count): + for y in range(mosaic.y_tile_count): + # ... [Your existing cropping logic here] ... + large_box = (x * TILE_SIZE, y * TILE_SIZE, + (x + 1) * TILE_SIZE, (y + 1) * TILE_SIZE) + small_box = (x * TILE_SIZE / TILE_BLOCK_SIZE, + y * TILE_SIZE / TILE_BLOCK_SIZE, + (x + 1) * TILE_SIZE / TILE_BLOCK_SIZE, + (y + 1) * TILE_SIZE / TILE_BLOCK_SIZE) + + work_queue.put( + (list(original_img_small.crop(small_box).getdata()), + large_box)) + progress.update() + + # 4. Phase 2: Collect and Paste (The Consumer) + # We call this in the MAIN process. It will block here until + # the workers finish sending results through the result_queue. + + except KeyboardInterrupt: + print('\nHalting, saving partial image please wait...') + # We tell the workers to stop + for n in range(WORKER_COUNT): + work_queue.put((EOQ_VALUE, EOQ_VALUE)) + + finally: + # Ensure workers are cleaned up + for p in worker_pool: + if p.is_alive(): + work_queue.put((EOQ_VALUE, EOQ_VALUE)) + + mosaic.assemble(result_queue, all_tile_data_large, + WORKER_COUNT) + mosaic.save(suffix=suffix) + + +def show_error(msg): + print('ERROR: {}'.format(msg)) + + +def mosaic(img_path, tiles_data, penalty=0.2, suffix=''): + """ Takes in Tiles Data as an Agrument now """ + image_data = TargetImage(img_path).get_data() + if tiles_data[0]: + imgTimer = ProcessTimer('Single Image ') + compose(image_data, tiles_data, penalty=penalty, suffix=suffix) + imgTimer.finish() + else: + show_error("Tiles Data not propery formatted!") + + +if __name__ == '__main__': + def restricted_float(x): + try: + x = float(x) + except ValueError: + raise argparse.ArgumentTypeError(f"{x} is not a \ + floating-point number") + + if x < 0.0 or x > 0.5: + raise argparse.ArgumentTypeError(f"{x} is not in range \ + [0.01, 0.5]") + return x + + parser = argparse.ArgumentParser( + description="Generate a high-quality mosaic.") + + # Create the mutually exclusive group for input + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("-file", "-f", help="Path to the source image file.") + group.add_argument("-folder", help="Path to a \ + folder of images (not yet implemented).") + + # The tiles directory with a default value + parser.add_argument("-tiles", "-t", + default="/mnt/ebs/frames", + help="Path to the directory \ + containing tiles (default: /mnt/ebs/frames)") + + parser.add_argument("-out_dir", "-o", + default="/mnt/ebs/mosaics", + help="This is the directory the \ + Mosaics will be save to.") + + parser.add_argument('-suffix', '-s', + help="Type something here if \ + if you want it appended \ + to the file name.") + + parser.add_argument('-penalty', + type=restricted_float, + default=0.2, + help="Set the penalty (range: 0.0 to 0.5, \ + default: 0.2) \ + High Penalty means less repetition of tiles") + args = parser.parse_args() + + # Current logic: Only handle the single file mode + if args.file: + source_image = os.path.abspath(args.file) + tile_dir = os.path.abspath(args.tiles) + + if not os.path.isfile(source_image): + show_error(f"Unable to find image file '{source_image}'") + elif not os.path.isdir(tile_dir): + show_error(f"Unable to find tile directory '{tile_dir}'") + else: + # Trigger the mosaic process + tiles_data = TileProcessor(tile_dir).get_tiles() + mosaic(source_image, tiles_data, + penalty=args.penalty, suffix=args.suffix) + + elif args.folder: + abs_folder = os.path.abspath(args.folder) + tile_dir = os.path.abspath(args.tiles) + try: + samples = [e.path for e in os.scandir(abs_folder) + if e.is_file()] + except FileNotFoundError: + print(f"Error: Folder '{abs_folder}' not found.") + exit(1) + try: + tp = TileProcessor(tile_dir) + tiles_data = tp.get_tiles() + except FileNotFoundError: + print(f"Error: Tile directory '{tile_dir}' not found.") + exit(1) + except Exception as e: + print(f"Error running TileProcessor class '{tile_dir}': {e}") + exit(1) + folderTimer = ProcessTimer('imgs by in folder') + for file_path in samples: + if not os.path.isfile(file_path): + show_error(f"Unable to find image file \ + '{file_path}'") + continue + elif not os.path.isdir(tile_dir): + show_error(f"Unable to find tile directory \ + '{tile_dir}'") + continue + else: + # Trigger the mosaic process + mosaic(file_path, tiles_data, + penalty=args.penalty, suffix=args.suffix) + folderTimer.finish() diff --git a/mosaicizers/mosaic.py b/mosaicizers/mosaic.py new file mode 100755 index 0000000..baa32ba --- /dev/null +++ b/mosaicizers/mosaic.py @@ -0,0 +1,395 @@ +import sys +import os +import io +import hashlib +import os.path +import argparse +from PIL import Image, ImageOps +from multiprocessing import Process, Queue, cpu_count +from ProcessTimer import ProcessTimer + +# Change these 3 config parameters to suit your needs... +TILE_SIZE = 50 # height/width of mosaic tiles in pixels +TILE_MATCH_RES = 5 # tile matching resolution +ENLARGEMENT = 8 # mosaic image will be this many times larger +Image.MAX_IMAGE_PIXELS = None # Dangerous, but allow it for now + +TILE_BLOCK_SIZE = TILE_SIZE / max(min(TILE_MATCH_RES, TILE_SIZE), 1) +WORKER_COUNT = max(cpu_count() - 1, 1) +OUT_FILE = 'mosaic.jpeg' +EOQ_VALUE = None + + +class TileProcessor: + def __init__(self, tiles_directory): + self.tiles_directory = tiles_directory + + def __process_tile(self, tile_path): + try: + img = Image.open(tile_path) + img = ImageOps.exif_transpose(img) + + # tiles must be square, so get the largest square that fits inside + w = img.size[0] + h = img.size[1] + min_dimension = min(w, h) + w_crop = (w - min_dimension) / 2 + h_crop = (h - min_dimension) / 2 + img = img.crop((w_crop, h_crop, w - w_crop, h - h_crop)) + + large_tile_img = img.resize((TILE_SIZE, TILE_SIZE), Image.LANCZOS) + small_tile_img = img.resize( + (int(TILE_SIZE / TILE_BLOCK_SIZE), + int(TILE_SIZE / TILE_BLOCK_SIZE)), + Image.LANCZOS + ) + + return (large_tile_img.convert('RGB'), + small_tile_img.convert('RGB')) + except Exception: + return (None, None) + + def get_tiles(self): + large_tiles = [] + small_tiles = [] + count = 0 + exp_threshold = 1 # for logging + print('Reading tiles from {}...'.format(self.tiles_directory)) + + # search the tiles directory recursively + for root, subFolders, files in os.walk(self.tiles_directory): + for tile_name in files: + tile_path = os.path.join(root, tile_name) + large_tile, small_tile = self.__process_tile(tile_path) + if large_tile: + large_tiles.append(large_tile) + small_tiles.append(small_tile) + count += 1 + if count == exp_threshold: + print(f'Processed {count} file(s) so far...') + exp_threshold = exp_threshold * 2 + + print('Processed {} tiles.'.format(len(large_tiles))) + + return (large_tiles, small_tiles) + + +class TargetImage: + def __init__(self, image_path): + self.image_path = image_path + + def get_data(self): + print('Processing main image...') + img = Image.open(self.image_path) + w = img.size[0] * ENLARGEMENT + h = img.size[1] * ENLARGEMENT + large_img = img.resize((w, h), Image.LANCZOS) + w_diff = (w % TILE_SIZE) / 2 + h_diff = (h % TILE_SIZE) / 2 + + # crop the image slightly so we use a whole number of tiles + if w_diff or h_diff: + large_img = large_img.crop( + (w_diff, h_diff, w - w_diff, h - h_diff) + ) + + small_img = large_img.resize( + (int(w / TILE_BLOCK_SIZE), int(h / TILE_BLOCK_SIZE)), + Image.LANCZOS + ) + + image_data = (large_img.convert('RGB'), small_img.convert('RGB')) + + print('Main image processed.') + + return image_data + + +class TileFitter: + def __init__(self, tiles_data): + self.tiles_data = tiles_data + + def __get_tile_diff(self, t1, t2, bail_out_value): + diff = 0 + for i in range(len(t1)): + diff += ((t1[i][0] - t2[i][0])**2 + + (t1[i][1] - t2[i][1])**2 + + (t1[i][2] - t2[i][2])**2) + if diff > bail_out_value: + return diff + return diff + + def get_best_fit_tile(self, img_data): + best_fit_tile_index = None + min_diff = sys.maxsize + tile_index = 0 + + for tile_data in self.tiles_data: + diff = self.__get_tile_diff(img_data, tile_data, min_diff) + if diff < min_diff: + min_diff = diff + best_fit_tile_index = tile_index + tile_index += 1 + + return best_fit_tile_index + + +def fit_tiles(work_queue, result_queue, tiles_data): + tile_fitter = TileFitter(tiles_data) + + while True: + try: + img_data, img_coords = work_queue.get(True) + if img_data == EOQ_VALUE: + break + tile_index = tile_fitter.get_best_fit_tile(img_data) + result_queue.put((img_coords, tile_index)) + except KeyboardInterrupt: + pass + + result_queue.put((EOQ_VALUE, EOQ_VALUE)) + + +class ProgressCounter: + def __init__(self, total): + self.total = total + self.counter = 0 + + def update(self): + self.counter += 1 + print("Progress: {:04.1f}%".format(100 * self.counter / self.total), + flush=True, end='\r') + + +class MosaicImage: + def __init__(self, original_img): + self.image = Image.new(original_img.mode, original_img.size) + self.x_tile_count = int(original_img.size[0] / TILE_SIZE) + self.y_tile_count = int(original_img.size[1] / TILE_SIZE) + self.total_tiles = self.x_tile_count * self.y_tile_count + + def add_tile(self, tile_data, coords): + img = Image.new('RGB', (TILE_SIZE, TILE_SIZE)) + img.putdata(tile_data) + self.image.paste(img, coords) + + def save(self): + """ + Saves the image_obj as a .jpeg to /mnt/ebs/legacy-mosaics + using its MD5 hash as the filename. + """ + output_dir = "/mnt/ebs/mosaics" + + # Ensure the output directory exists + os.makedirs(output_dir, exist_ok=True) + + # 1. Convert image to bytes to calculate hash + # We save to a temporary buffer or use the raw data + img_byte_arr = io.BytesIO() + self.image.save(img_byte_arr, format='JPEG') + img_bytes = img_byte_arr.getvalue() + + md5_hash = hashlib.md5(img_bytes).hexdigest() + + filename = f"{md5_hash}-legacy.jpeg" + final_path = os.path.join(output_dir, filename) + + with open(final_path, "wb") as f: + f.write(img_bytes) + + print(f"Mosaic saved to: {final_path}") + return final_path + + +def build_mosaic(result_queue, all_tile_data_large, original_img_large): + mosaic = MosaicImage(original_img_large) + + active_workers = WORKER_COUNT + while True: + try: + img_coords, best_fit_tile_index = result_queue.get() + + if img_coords == EOQ_VALUE: + active_workers -= 1 + if not active_workers: + break + else: + tile_data = all_tile_data_large[best_fit_tile_index] + mosaic.add_tile(tile_data, img_coords) + + except KeyboardInterrupt: + pass + + OUT_FILEPATH = mosaic.save() + print('\nFinished, output is in', OUT_FILEPATH) + + +def compose(original_img, tiles): + print('Building mosaic, press Ctrl-C to abort...') + original_img_large, original_img_small = original_img + tiles_large, tiles_small = tiles + + mosaic = MosaicImage(original_img_large) + + all_tile_data_large = [list(tile.getdata()) for tile in tiles_large] + all_tile_data_small = [list(tile.getdata()) for tile in tiles_small] + + work_queue = Queue(WORKER_COUNT) + result_queue = Queue() + + try: + Process(target=build_mosaic, args=( + result_queue, all_tile_data_large, original_img_large)).start() + + for n in range(WORKER_COUNT): + Process(target=fit_tiles, args=( + work_queue, result_queue, all_tile_data_small)).start() + + progress = ProgressCounter(mosaic.x_tile_count * mosaic.y_tile_count) + for x in range(mosaic.x_tile_count): + for y in range(mosaic.y_tile_count): + large_box = ( + x * TILE_SIZE, + y * TILE_SIZE, + (x + 1) * TILE_SIZE, + (y + 1) * TILE_SIZE + ) + small_box = ( + x * TILE_SIZE / TILE_BLOCK_SIZE, + y * TILE_SIZE / TILE_BLOCK_SIZE, + (x + 1) * TILE_SIZE / TILE_BLOCK_SIZE, + (y + 1) * TILE_SIZE / TILE_BLOCK_SIZE + ) + work_queue.put( + (list(original_img_small.crop(small_box).getdata()), + large_box) + ) + progress.update() # process updates on every x completion. + + except KeyboardInterrupt: + print('\nHalting, saving partial image please wait...') + + finally: + for n in range(WORKER_COUNT): + work_queue.put((EOQ_VALUE, EOQ_VALUE)) + + +def show_error(msg): + print('ERROR: {}'.format(msg)) + + +def mosaic(img_path, tiles_path): + mosaicTimer = ProcessTimer(f'Running for: {img_path}') + image_data = TargetImage(img_path).get_data() + tiles_data = TileProcessor(tiles_path).get_tiles() + if tiles_data[0]: + compose(image_data, tiles_data) + else: + show_error( + "No images found in tiles directory '{}'".format(tiles_path) + ) + mosaicTimer.finish() + + +""" +if __name__ == '__main__': + if len(sys.argv) < 3: + show_error('Usage: {} \r'.format(sys.argv[0])) + else: + source_image = sys.argv[1] + tile_dir = sys.argv[2] + if not os.path.isfile(source_image): + show_error("Unable to find image file '{}'".format(source_image)) + elif not os.path.isdir(tile_dir): + show_error("Unable to find tile directory '{}'".format(tile_dir)) + else: + mosaic(source_image, tile_dir) +""" + +if __name__ == '__main__': + def restricted_float(x): + try: + x = float(x) + except ValueError: + raise argparse.ArgumentTypeError(f"{x} is not a \ + floating-point number") + + if x < 0.0 or x > 0.5: + raise argparse.ArgumentTypeError(f"{x} is not in range \ + [0.01, 0.5]") + return x + + parser = argparse.ArgumentParser( + description="Generate a high-quality mosaic.") + + # Create the mutually exclusive group for input + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("-file", "-f", help="Path to the source image file.") + group.add_argument("-folder", help="Path to a \ + folder of images.") + + # The tiles directory with a default value + parser.add_argument("-tiles", "-t", + default="/mnt/ebs/frames", + help="Path to the directory \ + containing tiles (default: /mnt/ebs/frames)") + + parser.add_argument("-out_dir", "-o", + default="/mnt/ebs/mosaics", + help="This is the directory the \ + Mosaics will be save to.") + + parser.add_argument('-suffix', '-s', + help="Type something here if \ + if you want it appended \ + to the file name.") + + parser.add_argument('-penalty', + type=restricted_float, + default=0.2, + help="Set the penalty (range: 0.0 to 0.5, \ + default: 0.2) \ + High Penalty means less repetition of tiles") + args = parser.parse_args() + + # Current logic: Only handle the single file mode + if args.file: + source_image = os.path.abspath(args.file) + tile_dir = os.path.abspath(args.tiles) + + if not os.path.isfile(source_image): + show_error(f"Unable to find image file '{source_image}'") + elif not os.path.isdir(tile_dir): + show_error(f"Unable to find tile directory '{tile_dir}'") + else: + # Trigger the mosaic process + mosaic(source_image, tile_dir) + + elif args.folder: + abs_folder = os.path.abspath(args.folder) + tile_dir = os.path.abspath(args.tiles) + try: + samples = [e.path for e in os.scandir(abs_folder) + if e.is_file()] + except FileNotFoundError: + print(f"Error: Folder '{abs_folder}' not found.") + exit(1) + try: + os.scandir(tile_dir) + except Exception: + print(f"Error: Tile directory '{tile_dir}' not found.") + exit(1) + folderTimer = ProcessTimer('imgs by in folder') + for file_path in samples: + if not os.path.isfile(file_path): + show_error(f"Unable to find image file \ + '{file_path}'") + continue + elif not os.path.isdir(tile_dir): + show_error(f"Unable to find tile directory \ + '{tile_dir}'") + continue + else: + # Trigger the mosaic process + mosaic(file_path, tile_dir) + folderTimer.finish() diff --git a/pep8-enforcement.yaml b/pep8-enforcement.yaml new file mode 100755 index 0000000..7f146e1 --- /dev/null +++ b/pep8-enforcement.yaml @@ -0,0 +1,69 @@ +name: PEP 8 Compliance Check + +on: + workflow_dispatch: + push: + branches: + - '**' # Run on pushes to any branch + +jobs: + pep8_check: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # IMPORTANT: Fetch all history for git diff to work correctly + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' # Specify your Python version (e.g., '3.9', '3.10', '3.11') + + - name: Install flake8 + run: pip install flake8 + + - name: Get changed Python files + id: changed-files + run: | + # For push events, compare with the previous commit + # Handle cases where commits might not exist or be accessible + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + # Manual trigger - check all Python files + CHANGED_PYTHON_FILES=$(find . -name "*.py" -type f | tr '\n' ' ') + elif [ "${{ github.event.before }}" = "0000000000000000000000000000000000000000" ]; then + # First commit - check all Python files + CHANGED_PYTHON_FILES=$(find . -name "*.py" -type f | tr '\n' ' ') + else + # Try git diff, fallback to all files if it fails + CHANGED_PYTHON_FILES=$(git diff --name-only --diff-filter=ACM ${{ github.event.before }} ${{ github.sha }} 2>/dev/null | grep '\.py$' | tr '\n' ' ' || find . -name "*.py" -type f | tr '\n' ' ') + fi + + # Ensure we have a value even if empty + if [ -z "$CHANGED_PYTHON_FILES" ]; then + CHANGED_PYTHON_FILES="" + fi + + echo "changed_python_files=${CHANGED_PYTHON_FILES}" >> $GITHUB_OUTPUT + echo "Event type: ${{ github.event_name }}" + echo "Before commit: ${{ github.event.before }}" + echo "Current commit: ${{ github.sha }}" + echo "Found Python files: '$CHANGED_PYTHON_FILES'" + shell: bash + + - name: Run flake8 on changed Python files + if: success() && steps.changed-files.outputs.changed_python_files + run: | + echo "Checking the following files for PEP 8 compliance:" + echo "${{ steps.changed-files.outputs.changed_python_files }}" + echo "" + # Use xargs to properly handle multiple files + echo "${{ steps.changed-files.outputs.changed_python_files }}" | xargs flake8 + shell: bash + + - name: Report success if no Python files were changed (Optional, for clarity) + # This step only runs if the 'Run flake8' step was skipped due to no Python files being found. + # It provides a clear message in the GitHub Actions UI. + if: success() && !steps.changed-files.outputs.changed_python_files + run: echo "No .py files changed in this push. PEP 8 compliance check passed (skipped)." \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100755 index 0000000..1bc02ab --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +boto3>=1.42.59 +Pillow>=11.3.0 +opencv-python>=4.13.0.92 +numpy>=2.0.2 +scipy>=1.13.1 +scikit-image>=0.24.0 \ No newline at end of file diff --git a/test-image.py b/test-image.py new file mode 100755 index 0000000..c48559c --- /dev/null +++ b/test-image.py @@ -0,0 +1,16 @@ +from PIL import Image +import numpy as np +from skimage.metrics import structural_similarity as ssim + +# 1. Create a fake 100x100 image +data = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) +img = Image.fromarray(data) +img.save("test.jpg") + +# 2. Try to reload and SSIM it +reloaded = np.array(Image.open("test.jpg")) +try: + score = ssim(reloaded, reloaded, channel_axis=-1) + print(f"SSIM Success! Score: {score}") +except Exception as e: + print(f"SSIM Failed on this hardware: {e}") diff --git a/tools/advanced_parse.py b/tools/advanced_parse.py new file mode 100755 index 0000000..a0d97c2 --- /dev/null +++ b/tools/advanced_parse.py @@ -0,0 +1,141 @@ +######################################################################## +# The image parse makes crops based on random areas of an image +# It takes 40% area crops, and then finds the most complex +# and the most smooth. Using SSIM to identify structural complexity. +# The output is two 50x50 tiles per image, one complex and one smooth. +######################################################################### + +import cv2 +import numpy as np +import argparse +import os +import hashlib +import random +from skimage.metrics import structural_similarity as ssim +from concurrent.futures import ProcessPoolExecutor, as_completed +import multiprocessing + + +def get_image_md5(img): + """Generates an MD5 hash of the image pixel data.""" + return hashlib.md5(img.tobytes()).hexdigest() + + +def get_random_crops(img, num_crops=10): + """Extracts N square crops of a 40% shorter dimension.""" + h, w = img.shape[:2] + crop_size = int(min(h, w) * 0.4) + + crops = [] + for _ in range(num_crops): + y = np.random.randint(0, h - crop_size + 1) + x = np.random.randint(0, w - crop_size + 1) + crops.append(img[y:y+crop_size, x:x+crop_size]) + return crops + + +def batch_process(source_folder, out_dir, limit=None): + valid_exts = ('.jpg', '.jpeg', '.png') + files = [ + os.path.join(source_folder, f) for f in os.listdir(source_folder) + if f.lower().endswith(valid_exts) + ] + + random.shuffle(files) + if limit: + files = files[:limit] + + num_cpus = multiprocessing.cpu_count() + print(f"Launching parallel processing on {num_cpus} CPUs...") + + with ProcessPoolExecutor(max_workers=num_cpus) as executor: + futures = {executor.submit(process_image, f, out_dir): f for f in files} # noqa: E501 + + for future in as_completed(futures): + result = future.result() + print(result) + + +def analyze_structural_complexity(crop): + """ + Returns an SSIM score comparing the crop to a blurred version of itself. + High Score (~1.0): Smooth / Low Detail + Low Score (<0.5): Complex / High Detail + """ + gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY) + # 25x25 kernel provides a significant enough blur to measure structure loss + blurred = cv2.bilateralFilter(gray, 9, 75, 75) + return ssim(gray, blurred) + + +def save_tile(crop, output_dir, suffix): + """Resizes to 50x50, hashes, and saves to disk.""" + tile_50x50 = cv2.resize(crop, (50, 50), interpolation=cv2.INTER_AREA) + file_hash = get_image_md5(tile_50x50) + + filename = f"{file_hash}_{suffix}.png" + save_path = os.path.join(output_dir, filename) + + cv2.imwrite(save_path, tile_50x50) + return save_path + + +def process_image(image_path, out_dir): + img = cv2.imread(image_path) + if img is None: + print(f"Error: Could not load {image_path}") + return + + # 1. Generate candidate crops + candidates = get_random_crops(img, num_crops=15) + + # 2. Score candidates + # Store as list of (score, crop) tuples + scored_crops = [(analyze_structural_complexity(c), c) for c in candidates] + + # 3. Identify best matches + scored_crops.sort(key=lambda x: x[0]) + + most_complex_crop = scored_crops[0][1] + most_smooth_crop = scored_crops[-1][1] + + # 4. Save results + output_dir = out_dir + os.makedirs(output_dir, exist_ok=True) + + path_c = save_tile(most_complex_crop, output_dir, "complex") + path_s = save_tile(most_smooth_crop, output_dir, "smooth") + + print(f"Success! \nComplex: {path_c} \nSmooth: {path_s}") + + +def main(): + parser = argparse.ArgumentParser(description="Tile extractor \ + with batch support.") + + parser.add_argument("--input", type=str, + help="Path to a single input image") + parser.add_argument("--source-folder", type=str, + help="Folder containing images to process") + parser.add_argument("--out-dir", type=str, + default="test_tiles", help="Where to save tiles") + parser.add_argument("--limit", type=int, + help="Max number of images to process from folder") + + args = parser.parse_args() + + if args.source_folder: + if not os.path.isdir(args.source_folder): + print(f"Error: {args.source_folder} is not a directory.") + return + batch_process(args.source_folder, args.out_dir, args.limit) + + elif args.input: + process_image(args.input, args.out_dir) + + else: + parser.print_help() + + +if __name__ == "__main__": + main() diff --git a/tools/load_data_from_s3.py b/tools/load_data_from_s3.py new file mode 100755 index 0000000..ff9c663 --- /dev/null +++ b/tools/load_data_from_s3.py @@ -0,0 +1,150 @@ +import os +import argparse +import sys +from PIL import Image +from s3_access import S3Access +from random import shuffle + + +def resize_in_place(file_path, max_dimension=600): + """ + Opens an image, resizes it to fit within max_dimension + (maintaining aspect ratio), and overwrites the original file. + """ + try: + with Image.open(file_path) as img: + if img.mode != 'RGB': + # Enforces RGB for uniformity + img = img.convert('RGB') + # .thumbnail handles aspect ratio automatically + # It only shrinks if the image is larger than 600px + img.thumbnail((max_dimension, max_dimension), + Image.Resampling.LANCZOS) + img.save(file_path) + return True + except Exception as e: + print(f"Error resizing {file_path}: {e}") + return False + + +def handle_upload(s3, local_dir, bucket_name): + if not os.path.exists(local_dir): + print(f"Skipping upload: {local_dir} does not exist.") + return + + valid_images = ('.png', '.jpg', '.jpeg', '.tiff') + print(f"Uploading results from {local_dir} to S3 \ + {bucket_name}/mosaics'...") + + for filename in os.listdir(local_dir): + if filename.lower().endswith(valid_images): + s3.upload_from_disk(os.path.join(local_dir, filename), + f"mosaics/{filename}") + + +def main(): + parser = argparse.ArgumentParser( + description="Transfer assets between S3 and EBS storage." + ) + + # Create a mutually exclusive group so only one task happens at a time + task_group = parser.add_mutually_exclusive_group(required=True) + + task_group.add_argument( + "--videos", action="store_true", + help="Download video assets" + ) + task_group.add_argument( + "--samples", action="store_true", + help="Download sample images (to be turned into mosaics)" + ) + task_group.add_argument( + "--photos", action="store_true", + help="Download tile photos (sources for mosaic tiles)" + ) + task_group.add_argument( + "--upload-results", action="store_true", + help="Upload local mosaics to S3" + ) + + # Shared modifiers + parser.add_argument( + "--limit", type=int, default=None, + help="Limit the number of files downloaded \ + (e.g., 15 for videos, 3 for samples, 100 for photos)" + ) + parser.add_argument( + "--out-dir", type=str, default=None, + help="Override the default local directory" + ) + parser.add_argument( + "--prefix", type=str, default=None, + help="Override the default S3 prefix (folder) to pull from" + ) + + args = parser.parse_args() + + # Configuration + bucket_name = os.environ.get("S3_STORAGE") + mount_point = "/mnt/ebs" + + if not bucket_name: + print("ERROR: Environment variable 'S3_STORAGE' is not set.") + sys.exit(1) + + s3 = S3Access(bucket_name) + valid_extensions = ('.png', '.jpeg', '.jpg', 'webp') + + # --- Logic Based Assignment for Defaults --- + if args.videos: + local_dir = args.out_dir or os.path.join(mount_point, "raw_vids") + prefix = args.prefix or "lowresvideo" + limit = args.limit or (15 if "limit" in sys.argv else None) + is_image = False + + elif args.samples: + local_dir = args.out_dir or os.path.join(mount_point, "samples") + prefix = args.prefix or "mosaic-art-photos" + limit = args.limit or (3 if "limit" in sys.argv else None) + is_image = True + + elif args.photos: + local_dir = args.out_dir or os.path.join(mount_point, "raw_photos") + prefix = args.prefix or "picsources" + limit = args.limit or (100 if "limit" in sys.argv else None) + is_image = True + + elif args.upload_results: + # Upload logic is slightly different + local_dir = args.out_dir or os.path.join(mount_point, "mosaics") + handle_upload(s3, local_dir, bucket_name) + return + + # --- Unified Download Execution --- + os.makedirs(local_dir, exist_ok=True) + print(f"Syncing S3 '{prefix}' to {local_dir}...") + + keys = s3.list_sources(prefix) + + if limit: + shuffle(keys) + keys = keys[:limit] + + for key in keys: + filename = os.path.basename(key) + if not filename: + continue + + # Filter for images if applicable + if is_image and not key.lower().endswith(valid_extensions): + continue + + dest_path = os.path.join(local_dir, filename) + s3.download_to_disk(key, dest_path) + + if is_image: + resize_in_place(dest_path) + + +if __name__ == "__main__": + main() diff --git a/tools/s3_access.py b/tools/s3_access.py new file mode 100755 index 0000000..5c91c5a --- /dev/null +++ b/tools/s3_access.py @@ -0,0 +1,216 @@ +import boto3 +import os +from botocore.exceptions import ClientError + + +class S3Access: + """S3 access class for managing S3 bucket operations.""" + + def __init__(self, bucket_name: str): + """ + Initialize S3Access with a bucket name. + + @Args: + bucket_name (str): Name of the S3 bucket to connect to + """ + self.bucket_name = bucket_name + self.s3_client = boto3.client('s3') + + def list_sources(self, s3folder: str): + """ + List all objects in the sources folder of the S3 bucket. + + Returns: + list: List of object keys in the sources folder + """ + try: + # List objects with prefix 'sources/' + response = self.s3_client.list_objects_v2( + Bucket=self.bucket_name, + Prefix=s3folder + ) + + # Extract object keys from the response + if 'Contents' in response: + object_keys = [obj['Key'] for obj in response['Contents']] + return object_keys + else: + return [] + + except ClientError as e: + print(f"Error listing sources: {e}") + return [] + + def rename_key(self, current_key, new_key): + """ + Rename an S3 object by copying it to a new key and deleting the old. + + Args: + current_key (str): Current key name of the S3 object + new_key (str): New key name for the S3 object + + Returns: + bool: True if successful, False otherwise + """ + try: + # Copy the object to the new key + copy_source = { + 'Bucket': self.bucket_name, + 'Key': current_key + } + + self.s3_client.copy_object( + Bucket=self.bucket_name, + CopySource=copy_source, + Key=new_key + ) + + # Delete the original object + self.s3_client.delete_object( + Bucket=self.bucket_name, + Key=current_key + ) + + print(f"Successfully renamed {current_key} to {new_key}") + return True + + except ClientError as e: + print(f"Error renaming key {current_key} to {new_key}: {e}") + return False + + def put_object(self, key, file_object): + """ + Upload a file object to S3 with the specified key. + + Args: + key (str): Key name for the S3 object + file_object: File-like object to upload (must support read()) + + Returns: + bool: True if successful, False otherwise + """ + try: + self.s3_client.put_object( + Bucket=self.bucket_name, + Key=key, + Body=file_object + ) + + print(f"Successfully uploaded object to {key}") + return True + + except ClientError as e: + print(f"Error uploading object to {key}: {e}") + return False + + def get_object(self, key): + """ + Get an object from S3 with the specified key. + + Args: + key (str): Key name of the S3 object to retrieve + + Returns: + bytes: File content as bytes, or None if error + """ + try: + response = self.s3_client.get_object( + Bucket=self.bucket_name, + Key=key + ) + + file_content = response['Body'].read() + print(f"Successfully retrieved object {key}") + return file_content + + except ClientError as e: + print(f"Error retrieving object {key}: {e}") + return None + + def object_exists(self, key): + """ + Check if an object exists in S3 with the specified key. + + Args: + key (str): Key name of the S3 object to check + + Returns: + bool: True if object exists, False otherwise + """ + try: + self.s3_client.head_object( + Bucket=self.bucket_name, + Key=key + ) + return True + + except ClientError as e: + if e.response['Error']['Code'] == '404': + return False + else: + print(f"Error checking if object {key} exists: {e}") + return False + + def delete_object(self, key): + """ + Delete an object from S3 with the specified key. + + Args: + key (str): Key name of the S3 object to delete + + Returns: + bool: True if successful, False otherwise + """ + try: + self.s3_client.delete_object( + Bucket=self.bucket_name, + Key=key + ) + + print(f"Successfully deleted object {key}") + return True + + except ClientError as e: + print(f"Error deleting object {key}: {e}") + return False + + def download_to_disk(self, s3_key, local_path): + """ + Downloads a file from S3 directly to the local filesystem. + + Args: + s3_key (str): The key of the object in S3. + local_path (str): Where to save the file locally. + """ + try: + # Create the local directory if it doesn't exist + local_dir = os.path.dirname(local_path) + if local_dir and not os.path.exists(local_dir): + os.makedirs(local_dir) + + self.s3_client.download_file(self.bucket_name, s3_key, local_path) + print(f"Successfully downloaded {s3_key} to {local_path}") + return True + except ClientError as e: + print(f"Error downloading {s3_key}: {e}") + return False + + def upload_from_disk(self, local_path, s3_key): + """ + Uploads a file from the local filesystem to S3. + + Args: + local_path (str): Path to the local file. + s3_key (str): The destination key in S3. + """ + if not os.path.isfile(local_path): + print(f"Error: Local file {local_path} does not exist.") + return False + + try: + self.s3_client.upload_file(local_path, self.bucket_name, s3_key) + print(f"Successfully uploaded {local_path} to {s3_key}") + return True + except ClientError as e: + print(f"Error uploading {local_path}: {e}") + return False diff --git a/tools/splicer.py b/tools/splicer.py new file mode 100755 index 0000000..5c605c7 --- /dev/null +++ b/tools/splicer.py @@ -0,0 +1,227 @@ +import cv2 +import os +import hashlib +import argparse +import random +from multiprocessing import Pool, cpu_count +from functools import partial +from PIL import Image + +WORKER_COUNT = cpu_count() + + +def is_video_readable(v_path): + """ Verifies if a file is not corrupted """ + cap = cv2.VideoCapture(v_path) + if not cap.isOpened(): + return False + + # Try to grab just the first frame to see if the decoder barfs + ret, frame = cap.read() + cap.release() + + return ret and frame is not None + + +def get_file_md5(file_path): + hash_md5 = hashlib.md5() + try: + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() + except Exception: + return None + + +def resize_maintain_aspect(frame, short_side_target=200): + h, w = frame.shape[:2] + if h < w: + ratio = short_side_target / float(h) + new_dim = (int(w * ratio), short_side_target) + else: + ratio = short_side_target / float(w) + new_dim = (short_side_target, int(h * ratio)) + return cv2.resize(frame, new_dim, interpolation=cv2.INTER_AREA) + + +def process_video_worker(absolute_video_path, output_folder, density=5): + """The function each CPU core will run.""" + if not is_video_readable(absolute_video_path): + return f"Error: {os.path.basename(absolute_video_path)}\ + is not readable!" + cap = cv2.VideoCapture(absolute_video_path) + if not cap.isOpened(): + return f"Error: {os.path.basename(absolute_video_path)}" + + fps = cap.get(cv2.CAP_PROP_FPS) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + if fps <= 0: + cap.release() + return f"Bad FPS: {os.path.basename(absolute_video_path)}" + + duration_seconds = total_frames / fps + interval_seconds = 1 if duration_seconds < 60 else density + capture_step = int(fps * interval_seconds) + + file_hash = get_file_md5(absolute_video_path) + saved_count = 0 + current_frame_idx = 0 + + while True: + ret, frame = cap.read() + if not ret: + break + + if current_frame_idx % capture_step == 0: + saved_count += 1 + filename = f"{file_hash}-{saved_count:04d}.png" + processed_frame = resize_maintain_aspect(frame, + short_side_target=200) + save_path = os.path.join(output_folder, filename) + cv2.imwrite(save_path, processed_frame) + + current_frame_idx += 1 + + cap.release() + return f"Done: {os.path.basename(absolute_video_path)} \ + ({saved_count} frames)" + + +def process_image(image_path, double=True, + output_dir="/mnt/ebs/frames"): + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + try: + with Image.open(image_path) as img: + # Ensure RGB mode + if img.mode != 'RGB': + img = img.convert('RGB') + + width, height = img.size + short_side = min(width, height) + is_square = (width == height) + + # --- Crop 1: Upper Left --- + upper_left_box = (0, 0, short_side, short_side) + _save_processed_crop(img.crop(upper_left_box), output_dir) + + # --- Crop 2: Lower Right (If source not square) --- + if double and not is_square: + lower_right_box = (width - short_side, + height - short_side, + width, height) + _save_processed_crop(img.crop(lower_right_box), + output_dir) + + except Exception as e: + print(f"Error processing {image_path}: {e}") + + +def _save_processed_crop(crop_img, output_dir): + """Internal helper to resize, hash, and save the image.""" + resized = crop_img.resize((200, 200), Image.Resampling.LANCZOS) + hash_name = hashlib.md5(resized.tobytes()).hexdigest() + save_path = os.path.join(output_dir, f"{hash_name}.png") + resized.save(save_path, "PNG") + # way to much logging. Images get into thousands. + # print(f" [#] Saved: {hash_name}.png") + + +def main(): + parser = argparse.ArgumentParser( + description="Multi-threaded Media Processor") + + mode_group = parser.add_mutually_exclusive_group(required=True) + + mode_group.add_argument("-video-file", + help="Path to a single video file.") + mode_group.add_argument("-video-folder", + help="Path to folder for recursive \ + video processing.") + mode_group.add_argument("-image-folder", + help="Path to folder containing \ + images for processing.") + + parser.add_argument("-out-dir", + help="Override default output directory \ + (/mnt/ebs/frames)") + parser.add_argument("-limit", type=int, + help="Limit number of files processed \ + (for testing)") + parser.add_argument("-density", type=int, default=5, + help="In seconds, frequeence \ + of frame capture \ + (> 60s always 1 second)") + + args = parser.parse_args() + + # Decalare output folder + output_folder = args.out_dir or "/mnt/ebs/frames" + density = args.density + if not os.path.exists(output_folder): + os.makedirs(output_folder) + + # --- BRANCH 1: IMAGE PROCESSING --- + if args.image_folder: + print(f"Starting Image Processing mode on: {args.image_folder}") + abs_folder = os.path.abspath(args.image_folder) + photos = [] + for root, _, files in os.walk(abs_folder): + for file in files: + extension = file.split(".")[-1] + if extension.lower() in ["png", "jpg", "jpeg"]: + photos.append(os.path.join(root, file)) + + if args.limit: + random.shuffle(photos) + photos = photos[:args.limit] + + if len(photos) == 0: + print(f'No photos found {abs_folder}') + for photo in photos: + process_image(photo, double=True, + output_dir=output_folder) + return + + # --- BRANCH 2: VIDEO PROCESSING --- + video_files = [] + + if args.video_file: + video_files.append(os.path.abspath(args.video_file)) + + elif args.video_folder: + abs_folder = os.path.abspath(args.video_folder) + for root, _, files in os.walk(abs_folder): + for file in files: + if file.lower().endswith(".mp4"): + video_files.append(os.path.join(root, file)) + + if args.limit: + random.shuffle(video_files) + video_files = video_files[:args.limit] + + if not video_files: + print("No videos found.") + return + + print(f"Distributing {len(video_files)} \ + videos across {WORKER_COUNT} CPUs...") + + # --- THE MULTIPROCESSING MAGIC --- + worker_func = partial(process_video_worker, + output_folder=output_folder, + density=density) + + with Pool(processes=WORKER_COUNT) as pool: + for result in pool.imap_unordered(worker_func, video_files): + print(f" [+] {result}") + + print(f"\nProcessing complete. All frames in: {output_folder}") + + +if __name__ == "__main__": + main()