From 17eec04a35a128d111d8bf20cdc49f779cd5f539 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Fri, 3 Apr 2026 22:32:02 +0600 Subject: [PATCH 01/15] Restructure the project by moving files into proper subdirectories that follows the best practices --- app.py => app/app.py | 2 +- app/schema/validation.py | 0 conf_mat.py => static/conf_mat.py | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename app.py => app/app.py (99%) create mode 100644 app/schema/validation.py rename conf_mat.py => static/conf_mat.py (100%) diff --git a/app.py b/app/app.py similarity index 99% rename from app.py rename to app/app.py index 452e7a0..a0dc7c8 100644 --- a/app.py +++ b/app/app.py @@ -1,7 +1,7 @@ from flask import Flask, render_template, request, jsonify import joblib import numpy as np -from fit import main +from models.fit import main import os app = Flask(__name__) diff --git a/app/schema/validation.py b/app/schema/validation.py new file mode 100644 index 0000000..e69de29 diff --git a/conf_mat.py b/static/conf_mat.py similarity index 100% rename from conf_mat.py rename to static/conf_mat.py From ceb7cce7115b6f27be9cf8726abadc3cb6058776 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Fri, 3 Apr 2026 22:33:02 +0600 Subject: [PATCH 02/15] Clean all mess caused by the restructure and move frontend to `/app` --- app/__init__.py | 0 {static => app/static}/conf_mat.py | 2 +- {static => app/static}/confusion_matrix.png | Bin {static => app/static}/script.js | 0 {static => app/static}/style.css | 0 {templates => app/templates}/index.html | 0 models/__init__.py | 0 fit.py => models/fit.py | 0 notebooks/__init__.py | 0 research.py => notebooks/research.py | 0 research_2.py => notebooks/research_2.py | 0 11 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 app/__init__.py rename {static => app/static}/conf_mat.py (99%) rename {static => app/static}/confusion_matrix.png (100%) rename {static => app/static}/script.js (100%) rename {static => app/static}/style.css (100%) rename {templates => app/templates}/index.html (100%) create mode 100644 models/__init__.py rename fit.py => models/fit.py (100%) create mode 100644 notebooks/__init__.py rename research.py => notebooks/research.py (100%) rename research_2.py => notebooks/research_2.py (100%) diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/static/conf_mat.py b/app/static/conf_mat.py similarity index 99% rename from static/conf_mat.py rename to app/static/conf_mat.py index ce3e27f..afbcad1 100644 --- a/static/conf_mat.py +++ b/app/static/conf_mat.py @@ -152,7 +152,7 @@ def plotting(x, y) -> None: # ── Save ───────────────────────────────────────────────────────────────── plt.tight_layout() plt.savefig( - "static/confusion_matrix.png", + "app/static/confusion_matrix.png", dpi=150, facecolor=BG_COLOR, edgecolor='none', diff --git a/static/confusion_matrix.png b/app/static/confusion_matrix.png similarity index 100% rename from static/confusion_matrix.png rename to app/static/confusion_matrix.png diff --git a/static/script.js b/app/static/script.js similarity index 100% rename from static/script.js rename to app/static/script.js diff --git a/static/style.css b/app/static/style.css similarity index 100% rename from static/style.css rename to app/static/style.css diff --git a/templates/index.html b/app/templates/index.html similarity index 100% rename from templates/index.html rename to app/templates/index.html diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fit.py b/models/fit.py similarity index 100% rename from fit.py rename to models/fit.py diff --git a/notebooks/__init__.py b/notebooks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/research.py b/notebooks/research.py similarity index 100% rename from research.py rename to notebooks/research.py diff --git a/research_2.py b/notebooks/research_2.py similarity index 100% rename from research_2.py rename to notebooks/research_2.py From af74fbc61b17aa1a0e5f316f3fdcf55bab51a1d8 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Fri, 3 Apr 2026 22:33:52 +0600 Subject: [PATCH 03/15] Fix Dockerfile `CMD` and `ENV` command --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 85950e7..fc438bd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,8 +14,8 @@ RUN pip install --upgrade pip && pip install -r requirements.txt COPY . . # example env and port -ENV FLASK_APP=app.py +ENV FLASK_APP=app.app EXPOSE 5000 # run the flask app -CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app", "--workers", "2"] +CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app.app:app", "--workers", "2"] From b04cb585dca2529f0f784f0a162cec3d12be0c78 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Fri, 3 Apr 2026 22:34:29 +0600 Subject: [PATCH 04/15] Modify CI scripts to work properly with the restructure --- .github/workflows/docker.yml | 6 ++---- .github/workflows/python-app.yml | 12 +++++------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index d0b1504..215d04b 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -19,14 +19,12 @@ jobs: src: - 'Datasets/**' - '.github/workflows/docker.yml' - - 'app.py' + - 'app/**' - 'Dockerfile' - 'requirements.txt' - - 'templates/**' - - 'static/**' - 'models/**' + - 'notebooks/**' - 'scripts/**' - - 'fit.py' - '.dockerignore' docker-build: diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 72ef93e..ceba3ea 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -24,12 +24,10 @@ jobs: with: filters: | src: - - 'app.py' - - 'fit.py' + - 'app/**' + - 'models/fit.py' - 'requirements.txt' - - 'templates/**' - - 'static/**' - - 'models/**' + - 'notebooks/**' - 'scripts/**' - 'Datasets/**' - '.github/workflows/python-app.yml' @@ -58,8 +56,8 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Run Flask app and test with Curl run: | - nohup python app.py & + nohup python -m app.app & sleep 10 curl -I http://127.0.0.1:5000 - pkill -f "python app.py" + pkill -f "python -m app.app" From 0f98b2e11fe881475dbebb577356c170aefd00b4 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Fri, 3 Apr 2026 22:35:05 +0600 Subject: [PATCH 05/15] Update README with updated instructions and file tree --- README.md | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 45e5cf2..afd07fa 100644 --- a/README.md +++ b/README.md @@ -85,37 +85,43 @@ CosmoClassifier/ │ ├─ check.yml │ ├─ docker.yml │ └─ python-app.yml +├─ app/ +│ ├─ schema/ +│ │ └─ validation.py +│ ├─ static/ +│ │ ├─ confusion_matrix.png +│ │ ├─ conf_mat.py +│ │ ├─ script.js +│ │ └─ style.css +│ ├─ templates/ +│ │ └─ index.html +│ ├─ app.py +│ └─ __init__.py ├─ Datasets/ │ └─ SDSS_DR18.csv ├─ models/ +│ ├─ __init__.py +│ ├─ fit.py │ ├─ column_names.pkl │ └─ estimator.pkl +├─ notebooks/ +│ ├─ __init__.py +│ ├─ research_2.py +│ └─ research.py ├─ reports/ │ ├─ research_2.html │ └─ research.html ├─ screenshots/ │ ├─ ss_filled.png │ └─ ss_home.png -├─ static/ -│ ├─ confusion_matrix.png -│ ├─ script.js -│ └─ style.css -├─ templates/ -│ └─ index.html ├─ .dockerignore ├─ .env.example ├─ .gitattributes ├─ .gitignore -├─ app.py -├─ conf_mat.py ├─ Dockerfile -├─ fit.py -├─ health_checker.py ├─ LICENSE ├─ README.md -├─ requirements.txt -├─ research_2.py -└─ research.py +└─ requirements.txt ``` --- @@ -132,12 +138,13 @@ pip install -r requirements.txt ``` ### 3️⃣ Run the App ```bash -python app.py +python -m app.app ``` ### 4️⃣ Run Marimo Notebooks (Optional) To explore the research notebooks interactively: ```bash +cd notebooks/ marimo edit research.py # OR marimo edit research_2.py From d6c61d150a1ef13e64d582e6674037a2405aa7e1 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Fri, 3 Apr 2026 22:44:53 +0600 Subject: [PATCH 06/15] Fix repo name issue in README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index afd07fa..7d9b278 100644 --- a/README.md +++ b/README.md @@ -129,8 +129,8 @@ CosmoClassifier/ ### 1️⃣ Clone the Repository ```bash -git clone https://github.com/ByteBard58/The_CosmoClassifier -cd The_CosmoClassifier +git clone https://github.com/ByteBard58/CosmoClassifier +cd CosmoClassifier ``` ### 2️⃣ Install Dependencies ```bash From 2b72f5d297dfda8f3d7ce782970faf2a68c754ae Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Sat, 4 Apr 2026 07:53:25 +0600 Subject: [PATCH 07/15] Create new `app.py` to get started with FastAPI integration --- app/app.py | 117 ------------------------------------------------- app/prv_app.py | 117 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 117 deletions(-) create mode 100644 app/prv_app.py diff --git a/app/app.py b/app/app.py index a0dc7c8..e69de29 100644 --- a/app/app.py +++ b/app/app.py @@ -1,117 +0,0 @@ -from flask import Flask, render_template, request, jsonify -import joblib -import numpy as np -from models.fit import main -import os - -app = Flask(__name__) - -# Check for model and column names files, create them if they don't exist -def load_or_create_models(): - global pipe, column_names - model_path = "models/estimator.pkl" - columns_path = "models/column_names.pkl" - - # Create models directory if it doesn't exist - os.makedirs("models", exist_ok=True) - - # Check if both files exist - if not (os.path.exists(model_path) and os.path.exists(columns_path)): - print("Model or column names file not found. Running fit.py...") - main() # Call the dumping method to create the .pkl files - else: - print("All artifacts are found! Loading them now....") - - # Load model and column names - try: - pipe = joblib.load(model_path) - column_names = joblib.load(columns_path) - print("Artifacts are loaded successfully! Ready for prediction....") - except Exception as e: - print(f"Artifacts could not be loaded ! Error: {e}") - -# Load or create models at startup -load_or_create_models() - -# Human-readable labels for inputs -# Human-readable labels for inputs -feature_labels = { - "ra": "Right Ascension (degrees)", - "dec": "Declination (degrees)", - "redshift": "Redshift Value", - "psfMag_r": "PSF Magnitude (r band)", - "u": "u (Ultraviolet Band)", - "g": "g (Green Band)", - "r": "r (Red Band)", - "i": "i (Near Infrared Band)", - "z": "z (Infrared Band)" -} - -# Define the fields we want the user to see, in order -DISPLAY_COLUMNS = ["ra", "dec", "redshift", "psfMag_r", "u", "g", "r", "i", "z"] - -@app.route("/") -def home(): - # Pass the display columns and their labels to the template - readable_names = [feature_labels.get(col, col) for col in DISPLAY_COLUMNS] - return render_template("index.html", columns=DISPLAY_COLUMNS, labels=readable_names, zip=zip) - -@app.route("/predict", methods=["POST"]) -def predict(): - # 1. Collect raw inputs from the form - raw_input = {} - for col in DISPLAY_COLUMNS: - val = request.form.get(col) - try: - raw_input[col] = float(val) if val is not None and str(val).strip() != "" else np.nan - except ValueError: - raw_input[col] = np.nan - - # 2. Compute the derived color features - # logic: u_g_color = u - g, etc. - # Note: If any operand is NaN, the result will be NaN, which the imputer handles. - derived_data = {} - derived_data["ra"] = raw_input.get("ra", np.nan) - derived_data["dec"] = raw_input.get("dec", np.nan) - derived_data["redshift"] = raw_input.get("redshift", np.nan) - derived_data["psfMag_r"] = raw_input.get("psfMag_r", np.nan) - - # helper to safely subtract - def safe_sub(a, b): - return raw_input.get(a, np.nan) - raw_input.get(b, np.nan) - - derived_data["u_g_color"] = safe_sub("u", "g") - derived_data["g_r_color"] = safe_sub("g", "r") - derived_data["r_i_color"] = safe_sub("r", "i") - derived_data["i_z_color"] = safe_sub("i", "z") - - # 3. Assemble the final feature vector in the order the model expects - # column_names contains the features used during training (loaded from pickle) - final_features = [] - # We iterate over column_names from the loaded model to ensure correct order - # column_names includes "class", which we skip. - for col in column_names: - if col == "class": - continue - final_features.append(derived_data.get(col, np.nan)) - - # 4. Predict - # Reshape to 2D array: (1, n_features) - arr = np.array([final_features]) - - pred_class = pipe.predict(arr)[0] - probs = pipe.predict_proba(arr)[0] - - classes = list(pipe.classes_) - # map numeric classes to human-readable labels - label_map = {0: "GALAXY", 1: "STAR", 2: "QSO"} - # predicted class label - pred_label = label_map.get(int(pred_class), str(pred_class)) - # probabilities mapped to label names - probs_by_label = {label_map.get(int(cls), str(cls)): round(float(prob), 3) - for cls, prob in zip(classes, probs)} - response = {"prediction": pred_label, "probabilities": probs_by_label} - return jsonify(response) - -if __name__ == "__main__": - app.run() \ No newline at end of file diff --git a/app/prv_app.py b/app/prv_app.py new file mode 100644 index 0000000..a0dc7c8 --- /dev/null +++ b/app/prv_app.py @@ -0,0 +1,117 @@ +from flask import Flask, render_template, request, jsonify +import joblib +import numpy as np +from models.fit import main +import os + +app = Flask(__name__) + +# Check for model and column names files, create them if they don't exist +def load_or_create_models(): + global pipe, column_names + model_path = "models/estimator.pkl" + columns_path = "models/column_names.pkl" + + # Create models directory if it doesn't exist + os.makedirs("models", exist_ok=True) + + # Check if both files exist + if not (os.path.exists(model_path) and os.path.exists(columns_path)): + print("Model or column names file not found. Running fit.py...") + main() # Call the dumping method to create the .pkl files + else: + print("All artifacts are found! Loading them now....") + + # Load model and column names + try: + pipe = joblib.load(model_path) + column_names = joblib.load(columns_path) + print("Artifacts are loaded successfully! Ready for prediction....") + except Exception as e: + print(f"Artifacts could not be loaded ! Error: {e}") + +# Load or create models at startup +load_or_create_models() + +# Human-readable labels for inputs +# Human-readable labels for inputs +feature_labels = { + "ra": "Right Ascension (degrees)", + "dec": "Declination (degrees)", + "redshift": "Redshift Value", + "psfMag_r": "PSF Magnitude (r band)", + "u": "u (Ultraviolet Band)", + "g": "g (Green Band)", + "r": "r (Red Band)", + "i": "i (Near Infrared Band)", + "z": "z (Infrared Band)" +} + +# Define the fields we want the user to see, in order +DISPLAY_COLUMNS = ["ra", "dec", "redshift", "psfMag_r", "u", "g", "r", "i", "z"] + +@app.route("/") +def home(): + # Pass the display columns and their labels to the template + readable_names = [feature_labels.get(col, col) for col in DISPLAY_COLUMNS] + return render_template("index.html", columns=DISPLAY_COLUMNS, labels=readable_names, zip=zip) + +@app.route("/predict", methods=["POST"]) +def predict(): + # 1. Collect raw inputs from the form + raw_input = {} + for col in DISPLAY_COLUMNS: + val = request.form.get(col) + try: + raw_input[col] = float(val) if val is not None and str(val).strip() != "" else np.nan + except ValueError: + raw_input[col] = np.nan + + # 2. Compute the derived color features + # logic: u_g_color = u - g, etc. + # Note: If any operand is NaN, the result will be NaN, which the imputer handles. + derived_data = {} + derived_data["ra"] = raw_input.get("ra", np.nan) + derived_data["dec"] = raw_input.get("dec", np.nan) + derived_data["redshift"] = raw_input.get("redshift", np.nan) + derived_data["psfMag_r"] = raw_input.get("psfMag_r", np.nan) + + # helper to safely subtract + def safe_sub(a, b): + return raw_input.get(a, np.nan) - raw_input.get(b, np.nan) + + derived_data["u_g_color"] = safe_sub("u", "g") + derived_data["g_r_color"] = safe_sub("g", "r") + derived_data["r_i_color"] = safe_sub("r", "i") + derived_data["i_z_color"] = safe_sub("i", "z") + + # 3. Assemble the final feature vector in the order the model expects + # column_names contains the features used during training (loaded from pickle) + final_features = [] + # We iterate over column_names from the loaded model to ensure correct order + # column_names includes "class", which we skip. + for col in column_names: + if col == "class": + continue + final_features.append(derived_data.get(col, np.nan)) + + # 4. Predict + # Reshape to 2D array: (1, n_features) + arr = np.array([final_features]) + + pred_class = pipe.predict(arr)[0] + probs = pipe.predict_proba(arr)[0] + + classes = list(pipe.classes_) + # map numeric classes to human-readable labels + label_map = {0: "GALAXY", 1: "STAR", 2: "QSO"} + # predicted class label + pred_label = label_map.get(int(pred_class), str(pred_class)) + # probabilities mapped to label names + probs_by_label = {label_map.get(int(cls), str(cls)): round(float(prob), 3) + for cls, prob in zip(classes, probs)} + response = {"prediction": pred_label, "probabilities": probs_by_label} + return jsonify(response) + +if __name__ == "__main__": + app.run() \ No newline at end of file From 57746d743996340c070cfd3c95335b42ffe16cbc Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Sat, 4 Apr 2026 11:23:54 +0600 Subject: [PATCH 08/15] Update `requirements.txt` with newer list --- requirements.txt | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6048bf8..609c7d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ +annotated-doc==0.0.4 +annotated-types==0.7.0 +anyio==4.13.0 beautifulsoup4==4.14.3 blinker==1.9.0 bs4==0.0.2 @@ -7,9 +10,11 @@ click==8.3.1 contourpy==1.3.3 cycler==0.12.1 dotenv==0.9.9 +fastapi==0.135.3 Flask==3.1.2 fonttools==4.61.1 gunicorn==23.0.0 +h11==0.16.0 idna==3.11 imbalanced-learn==0.14.1 itsdangerous==2.2.0 @@ -23,6 +28,9 @@ numpy==2.4.1 packaging==25.0 pandas==2.3.3 pillow==12.1.0 +pyarrow==18.0.0 +pydantic==2.12.5 +pydantic_core==2.41.5 pyparsing==3.3.1 python-dateutil==2.9.0.post0 python-dotenv==1.2.1 @@ -35,10 +43,13 @@ setuptools==80.9.0 six==1.17.0 sklearn-compat==0.1.5 soupsieve==2.8.3 +starlette==1.0.0 threadpoolctl==3.6.0 +typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2025.3 urllib3==2.6.3 +uvicorn==0.43.0 Werkzeug==3.1.5 wheel==0.45.1 -xgboost==3.1.3 \ No newline at end of file +xgboost==3.1.3 From c2f9742983ae4c323f33374bc8503cbbadf06712 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Sat, 4 Apr 2026 11:53:39 +0600 Subject: [PATCH 09/15] Rename older app to `prv_app.py` --- app/prv_app.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/app/prv_app.py b/app/prv_app.py index a0dc7c8..80030f7 100644 --- a/app/prv_app.py +++ b/app/prv_app.py @@ -1,14 +1,16 @@ from flask import Flask, render_template, request, jsonify +from sklearn.pipeline import Pipeline +import numpy as np import joblib import numpy as np from models.fit import main +from typing import Tuple import os app = Flask(__name__) # Check for model and column names files, create them if they don't exist -def load_or_create_models(): - global pipe, column_names +def load_or_create_models() -> Tuple[Pipeline,np.ndarray]: model_path = "models/estimator.pkl" columns_path = "models/column_names.pkl" @@ -27,11 +29,12 @@ def load_or_create_models(): pipe = joblib.load(model_path) column_names = joblib.load(columns_path) print("Artifacts are loaded successfully! Ready for prediction....") + return pipe,column_names except Exception as e: print(f"Artifacts could not be loaded ! Error: {e}") # Load or create models at startup -load_or_create_models() +pipe, column_names = load_or_create_models() # Human-readable labels for inputs # Human-readable labels for inputs From ca323dcd830b6c5d9292d31895659ae3cbb414b4 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Sat, 4 Apr 2026 11:53:56 +0600 Subject: [PATCH 10/15] Add new FastAPI-powered `app.py` --- app/app.py | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/app/app.py b/app/app.py index e69de29..324fc43 100644 --- a/app/app.py +++ b/app/app.py @@ -0,0 +1,98 @@ +from fastapi import FastAPI, Depends, Request +from fastapi.responses import JSONResponse +from typing import Tuple,List +from sklearn.pipeline import Pipeline +from models.fit import main +from .schema.validation import UserInput +from pathlib import Path +from contextlib import asynccontextmanager +import joblib +import numpy as np +import os + +def load_or_create_models() -> Tuple[Pipeline,np.ndarray]: + model_path = Path("models","estimator.pkl") + columns_path = Path("models","column_names.pkl") + + # Create models directory if it doesn't exist + os.makedirs("models", exist_ok=True) + + # Check if both files exist + if not (os.path.exists(model_path) and os.path.exists(columns_path)): + print("Model or column names file not found. Running fit.py...") + main() # Call the dumping method to create the .pkl files + else: + print("All artifacts are found! Loading them now....") + + # Load model and column names + try: + pipe = joblib.load(model_path) + column_names = joblib.load(columns_path) + print("Artifacts are loaded successfully! Ready for prediction....") + return pipe,column_names + except Exception as e: + raise RuntimeError(f"Artifacts could not be loaded: {e}") + +@asynccontextmanager +async def lifespan(app:FastAPI): + # Load pipeline at start + pipe,column_names = load_or_create_models() + + app.state.pipe = pipe + app.state.column_names = column_names + + yield + +app = FastAPI(title="CosmoClassifier", version="2.0(FastAPI)", lifespan=lifespan) + +# Helper for providing the pipeline and column names +def get_model(request:Request) -> Tuple[Pipeline,np.ndarray]: + return request.app.state.pipe, request.app.state.column_names + +# Helper for subtraction in post route +def safe_sub(a:float,b:float,val:dict): + return val.get(a,None) - val.get(b,None) + +@app.get("/") +def home(): + msg = "Welcome to CosmoClassifier API. Provide the designated inputs " \ + "in the `predict` route to run predictions." \ + " Check the GitHub Repository for more." + return msg + +@app.post("/predict",status_code=201) +def prediction_ops(value:UserInput, dep:Tuple[Pipeline,np.ndarray] = Depends(get_model)): + pipe, column_names = dep + column_names:List[str] = column_names.tolist() + + # Preprocessing + value:dict = value.model_dump(mode="json") + kick = ["u","g","r","i","z"] + final_value = {key:val for key,val in value.items() if key not in kick} + final_value["u_g_color"] = safe_sub("u","g",value) + final_value["g_r_color"] = safe_sub("g","r",value) + final_value["r_i_color"] = safe_sub("r","i",value) + final_value["i_z_color"] = safe_sub("i","z",value) + + # Order Check and running prediction + final_res = [] + for col in column_names: + if col == "class": + continue + else: + final_res.append(final_value.get(col,None)) + final_res = np.array(final_res).reshape(1,-1) + + pred_label = int(pipe.predict(final_res)[0]) + pred_proba = pipe.predict_proba(final_res)[0].tolist() + + # Postprocessing + label_map = {0: "GALAXY", 1: "STAR", 2: "QSO"} + pred_label = label_map.get(pred_label) + pred_proba = {lmv:round(proba,3) for lmv,proba in zip(label_map.values(), pred_proba)} + + msg = {"message":"prediction successful","predicted_class":pred_label, "prediction_probability":pred_proba} + return JSONResponse( + status_code=201, content=msg + ) + From 258d3c4e98f8eca0eec6a98ba6efcfd598726847 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Sat, 4 Apr 2026 11:56:02 +0600 Subject: [PATCH 11/15] Add pydantic type validation script --- app/schema/validation.py | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/app/schema/validation.py b/app/schema/validation.py index e69de29..914c9c8 100644 --- a/app/schema/validation.py +++ b/app/schema/validation.py @@ -0,0 +1,42 @@ +from pydantic import BaseModel, Field +from typing import Annotated + +class UserInput(BaseModel): + ra : Annotated[float,Field( + ..., description="Right Ascension (required)", + ge=0, le=360,examples=[179.632888,183.3014244], allow_inf_nan=False + )] + dec : Annotated[float,Field( + ..., description="Declination (required)", + ge= -90, le=90, examples=[1.185939326,1.192324477], allow_inf_nan=False + )] + redshift : Annotated[float, Field( + ..., description="Redshift Value (required)", + ge=-2,le=10, examples=[-0.000133202, 0.02130958], allow_inf_nan=False + )] + psfMag_r : Annotated[float, Field( + ..., description="PSF Magnitude r band (required)", + ge = -30, le = 30, examples=[16.77047, 18.43308], allow_inf_nan=False + )] + u : Annotated[float, Field( + ..., description="Ultraviolet Band (required)", + ge = -30, le = 30, examples=[16.77047, 18.43308], allow_inf_nan=False + )] + g : Annotated[float, Field( + ..., description="Green Band (required)", + ge = -30, le = 30, examples=[16.77047, 18.43308], allow_inf_nan=False + )] + r : Annotated[float, Field( + ..., description="Red Band (required)", + ge = -30, le = 30, examples=[16.77047, 18.43308], allow_inf_nan=False + )] + i : Annotated[float, Field( + ..., description="Near-infrared Band (required)", + ge = -30, le = 30, examples=[16.77047, 18.43308], allow_inf_nan=False + )] + z : Annotated[float, Field( + ..., description="Infrared Band (required)", + ge = -30, le = 30, examples=[16.77047, 18.43308], allow_inf_nan=False + )] + + \ No newline at end of file From ac1e8a54087be27620f2578696f8002ada46bb22 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Sat, 4 Apr 2026 11:59:47 +0600 Subject: [PATCH 12/15] Add `/health` route --- app/app.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/app/app.py b/app/app.py index 324fc43..d57a0b3 100644 --- a/app/app.py +++ b/app/app.py @@ -53,6 +53,15 @@ def get_model(request:Request) -> Tuple[Pipeline,np.ndarray]: def safe_sub(a:float,b:float,val:dict): return val.get(a,None) - val.get(b,None) +@app.get("/health",status_code=200) +def health(): + msg = { + "title":"CosmoClassifier", + "version":"2.0(FastAPI)", + "status":"All systems operational" + } + return JSONResponse(status_code=200,content=msg) + @app.get("/") def home(): msg = "Welcome to CosmoClassifier API. Provide the designated inputs " \ From b803a11d43453e5f113b8f10db7452fa2280b104 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Sat, 4 Apr 2026 12:22:34 +0600 Subject: [PATCH 13/15] Remove old flask app --- app/prv_app.py | 120 ------------------------------------------------- 1 file changed, 120 deletions(-) delete mode 100644 app/prv_app.py diff --git a/app/prv_app.py b/app/prv_app.py deleted file mode 100644 index 80030f7..0000000 --- a/app/prv_app.py +++ /dev/null @@ -1,120 +0,0 @@ -from flask import Flask, render_template, request, jsonify -from sklearn.pipeline import Pipeline -import numpy as np -import joblib -import numpy as np -from models.fit import main -from typing import Tuple -import os - -app = Flask(__name__) - -# Check for model and column names files, create them if they don't exist -def load_or_create_models() -> Tuple[Pipeline,np.ndarray]: - model_path = "models/estimator.pkl" - columns_path = "models/column_names.pkl" - - # Create models directory if it doesn't exist - os.makedirs("models", exist_ok=True) - - # Check if both files exist - if not (os.path.exists(model_path) and os.path.exists(columns_path)): - print("Model or column names file not found. Running fit.py...") - main() # Call the dumping method to create the .pkl files - else: - print("All artifacts are found! Loading them now....") - - # Load model and column names - try: - pipe = joblib.load(model_path) - column_names = joblib.load(columns_path) - print("Artifacts are loaded successfully! Ready for prediction....") - return pipe,column_names - except Exception as e: - print(f"Artifacts could not be loaded ! Error: {e}") - -# Load or create models at startup -pipe, column_names = load_or_create_models() - -# Human-readable labels for inputs -# Human-readable labels for inputs -feature_labels = { - "ra": "Right Ascension (degrees)", - "dec": "Declination (degrees)", - "redshift": "Redshift Value", - "psfMag_r": "PSF Magnitude (r band)", - "u": "u (Ultraviolet Band)", - "g": "g (Green Band)", - "r": "r (Red Band)", - "i": "i (Near Infrared Band)", - "z": "z (Infrared Band)" -} - -# Define the fields we want the user to see, in order -DISPLAY_COLUMNS = ["ra", "dec", "redshift", "psfMag_r", "u", "g", "r", "i", "z"] - -@app.route("/") -def home(): - # Pass the display columns and their labels to the template - readable_names = [feature_labels.get(col, col) for col in DISPLAY_COLUMNS] - return render_template("index.html", columns=DISPLAY_COLUMNS, labels=readable_names, zip=zip) - -@app.route("/predict", methods=["POST"]) -def predict(): - # 1. Collect raw inputs from the form - raw_input = {} - for col in DISPLAY_COLUMNS: - val = request.form.get(col) - try: - raw_input[col] = float(val) if val is not None and str(val).strip() != "" else np.nan - except ValueError: - raw_input[col] = np.nan - - # 2. Compute the derived color features - # logic: u_g_color = u - g, etc. - # Note: If any operand is NaN, the result will be NaN, which the imputer handles. - derived_data = {} - derived_data["ra"] = raw_input.get("ra", np.nan) - derived_data["dec"] = raw_input.get("dec", np.nan) - derived_data["redshift"] = raw_input.get("redshift", np.nan) - derived_data["psfMag_r"] = raw_input.get("psfMag_r", np.nan) - - # helper to safely subtract - def safe_sub(a, b): - return raw_input.get(a, np.nan) - raw_input.get(b, np.nan) - - derived_data["u_g_color"] = safe_sub("u", "g") - derived_data["g_r_color"] = safe_sub("g", "r") - derived_data["r_i_color"] = safe_sub("r", "i") - derived_data["i_z_color"] = safe_sub("i", "z") - - # 3. Assemble the final feature vector in the order the model expects - # column_names contains the features used during training (loaded from pickle) - final_features = [] - # We iterate over column_names from the loaded model to ensure correct order - # column_names includes "class", which we skip. - for col in column_names: - if col == "class": - continue - final_features.append(derived_data.get(col, np.nan)) - - # 4. Predict - # Reshape to 2D array: (1, n_features) - arr = np.array([final_features]) - - pred_class = pipe.predict(arr)[0] - probs = pipe.predict_proba(arr)[0] - - classes = list(pipe.classes_) - # map numeric classes to human-readable labels - label_map = {0: "GALAXY", 1: "STAR", 2: "QSO"} - # predicted class label - pred_label = label_map.get(int(pred_class), str(pred_class)) - # probabilities mapped to label names - probs_by_label = {label_map.get(int(cls), str(cls)): round(float(prob), 3) - for cls, prob in zip(classes, probs)} - response = {"prediction": pred_label, "probabilities": probs_by_label} - return jsonify(response) - -if __name__ == "__main__": - app.run() \ No newline at end of file From 1b642fb6dec40fb5ba0f7ca7c897f234c89cbd03 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Sat, 4 Apr 2026 15:53:50 +0600 Subject: [PATCH 14/15] Link the existing vanilla frontend with the new fastapi backend --- README.md | 8 +++--- app/app.py | 34 +++++++++++++++++++---- app/static/script.js | 22 +++++++++++++-- app/templates/index.html | 60 ++++++++++++++++++++++++++++++++-------- 4 files changed, 99 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 7d9b278..726bfdc 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,7 @@ pip install -r requirements.txt ``` ### 3️⃣ Run the App ```bash -python -m app.app +uvicorn app.app:app --reload ``` ### 4️⃣ Run Marimo Notebooks (Optional) @@ -166,14 +166,14 @@ The image is built on both ARM64 and AMD64 architectures, so that it can run on 2. Open Terminal and run: ```bash docker pull bytebard101/cosmoclassifier:latest -docker run --rm -p 5000:5000 bytebard101/cosmoclassifier:latest +docker run --rm -p 8000:8000 bytebard101/cosmoclassifier:latest ``` 3. If your machine faces a port conflict, you will need to assign another port. Try to run this: ```bash -docker run --rm -p 5001:5000 bytebard101/cosmoclassifier:latest +docker run --rm -p 8001:8000 bytebard101/cosmoclassifier:latest ``` > If you followed Step 2 and the command ran successfully, then **DO NOT** follow this step. -4. The app will be live at localhost:5000. Open your browser and navigate to [http://127.0.0.1:5000](http://127.0.0.1:5000/) (or [http://127.0.0.1:5001](http://127.0.0.1:5000/) if you followed Step 3). +4. The app will be live at localhost:8000. Open your browser and navigate to [http://127.0.0.1:8000](http://127.0.0.1:8000/) (or [http://127.0.0.1:8001](http://127.0.0.1:8000/) if you followed Step 3). Check [Docker Documentation](https://docs.docker.com/) to learn more about Docker and it's commands. diff --git a/app/app.py b/app/app.py index d57a0b3..d9569a8 100644 --- a/app/app.py +++ b/app/app.py @@ -1,5 +1,7 @@ from fastapi import FastAPI, Depends, Request -from fastapi.responses import JSONResponse +from fastapi.responses import JSONResponse, FileResponse +from fastapi.staticfiles import StaticFiles +from fastapi.exceptions import RequestValidationError from typing import Tuple,List from sklearn.pipeline import Pipeline from models.fit import main @@ -45,6 +47,24 @@ async def lifespan(app:FastAPI): app = FastAPI(title="CosmoClassifier", version="2.0(FastAPI)", lifespan=lifespan) +@app.exception_handler(RequestValidationError) +async def validation_exception_handler(request: Request, exc: RequestValidationError): + errors = exc.errors() + # Flatten error messages for the frontend + error_msgs = [] + for err in errors: + field = err['loc'][-1] + msg = err['msg'] + error_msgs.append(f"Invalid {field}: {msg}") + + return JSONResponse( + status_code=422, + content={"message": "validation failed", "error": "; ".join(error_msgs)}, + ) + +# Mount static files +app.mount("/static", StaticFiles(directory=Path(__file__).parent / "static"), name="static") + # Helper for providing the pipeline and column names def get_model(request:Request) -> Tuple[Pipeline,np.ndarray]: return request.app.state.pipe, request.app.state.column_names @@ -64,10 +84,8 @@ def health(): @app.get("/") def home(): - msg = "Welcome to CosmoClassifier API. Provide the designated inputs " \ - "in the `predict` route to run predictions." \ - " Check the GitHub Repository for more." - return msg + index_path = Path(__file__).parent / "templates" / "index.html" + return FileResponse(index_path) @app.post("/predict",status_code=201) def prediction_ops(value:UserInput, dep:Tuple[Pipeline,np.ndarray] = Depends(get_model)): @@ -100,7 +118,11 @@ def prediction_ops(value:UserInput, dep:Tuple[Pipeline,np.ndarray] = Depends(get pred_label = label_map.get(pred_label) pred_proba = {lmv:round(proba,3) for lmv,proba in zip(label_map.values(), pred_proba)} - msg = {"message":"prediction successful","predicted_class":pred_label, "prediction_probability":pred_proba} + msg = { + "message": "prediction successful", + "prediction": pred_label, + "probabilities": pred_proba + } return JSONResponse( status_code=201, content=msg ) diff --git a/app/static/script.js b/app/static/script.js index 1352dc1..ccd2f7a 100644 --- a/app/static/script.js +++ b/app/static/script.js @@ -63,12 +63,28 @@ function initPredictForm() { try { const formData = new FormData(form); + const dataObj = Object.fromEntries(formData.entries()); + + // Convert numeric strings to numbers + for (let key in dataObj) { + if (!isNaN(dataObj[key]) && dataObj[key] !== "") { + dataObj[key] = parseFloat(dataObj[key]); + } + } + const response = await fetch("/predict", { method: "POST", - body: formData + headers: { + "Content-Type": "application/json" + }, + body: JSON.stringify(dataObj) }); - if (!response.ok) throw new Error("Prediction failed"); + if (!response.ok) { + const errorData = await response.json(); + const errorMessage = errorData.error || errorData.message || "Prediction failed"; + throw new Error(errorMessage); + } const data = await response.json(); @@ -89,7 +105,7 @@ function initPredictForm() { } catch (error) { console.error(error); - showToast('Error: Could not connect to the prediction service.', 'error'); + showToast(`Error: ${error.message}`, 'error'); // Show awaiting state again on error awaitingDiv.style.display = 'flex'; resultDiv.classList.add("hidden"); diff --git a/app/templates/index.html b/app/templates/index.html index 2bbd368..b306408 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -10,7 +10,7 @@ - + @@ -68,15 +68,51 @@

Classify Celestial Objects

- {% for col, label in zip(columns, labels) %} - {% if col != 'class' %} -
- - -
-
- {% endif %} - {% endfor %} +
+ + +
+
+
+ + +
+
+
+ + +
+
+
+ + +
+
+
+ + +
+
+
+ + +
+
+
+ + +
+
+
+ + +
+
+
+ + +
+
@@ -157,7 +193,7 @@

Model Statistics

Confusion Matrix

- Confusion Matrix + Confusion Matrix
@@ -257,6 +293,6 @@

~100K Training Samples

- + From e576c2d81441c38d9e68fc07ff80cef9dbaabce4 Mon Sep 17 00:00:00 2001 From: Sakib Hossain Date: Sat, 4 Apr 2026 15:54:56 +0600 Subject: [PATCH 15/15] Remove Flask code and add fastapi configuration in `Dockerfile` and `python-app.yml` --- .github/workflows/python-app.yml | 8 ++++---- Dockerfile | 7 +++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index ceba3ea..f7fbfbd 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -54,10 +54,10 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Run Flask app and test with Curl + - name: Run FastAPI app and test with Curl run: | - nohup python -m app.app & + nohup uvicorn app.app:app & sleep 10 - curl -I http://127.0.0.1:5000 - pkill -f "python -m app.app" + curl -I http://127.0.0.1:8000 + pkill -f uvicorn diff --git a/Dockerfile b/Dockerfile index fc438bd..be0ab51 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,8 +14,7 @@ RUN pip install --upgrade pip && pip install -r requirements.txt COPY . . # example env and port -ENV FLASK_APP=app.app -EXPOSE 5000 +EXPOSE 8000 -# run the flask app -CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app.app:app", "--workers", "2"] +# run the fastapi app with gunicorn and uvicorn workers +CMD ["gunicorn", "-w", "2", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000", "app.app:app"]