From af4e907ebe86708e379cc84db087678d67bd9d4c Mon Sep 17 00:00:00 2001
From: Aleksandr <105745366+AlexTkDev@users.noreply.github.com>
Date: Sun, 19 Apr 2026 23:34:51 +0300
Subject: [PATCH] Run pylint workflow only on pull requests

---
 .github/workflows/pylint.yml |   6 +-
 README.md                    |  38 +++++-----
 config.py                    |  23 ++++++
 models/README.md             | 134 +++++++++--------------------------
 services/llm.py              |   2 +-
 services/local_llm.py        |  63 ++++++++++++----
 tests/test_config.py         |  27 +++++++
 tests/test_local_llm.py      |  29 ++++++++
 8 files changed, 185 insertions(+), 137 deletions(-)
 create mode 100644 tests/test_config.py
 create mode 100644 tests/test_local_llm.py

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index d29f439..132c312 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -1,17 +1,17 @@
 name: Pylint
 
-on: [ push ]
+on: [ pull_request ]
 
 jobs:
   build:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.10", "3.14"]
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v3
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
diff --git a/README.md b/README.md
index c6292b3..79b8b6a 100644
--- a/README.md
+++ b/README.md
@@ -4,10 +4,10 @@
 
 > **Note:** All code comments and docstrings are in English for international collaboration and code clarity. All user-facing messages and buttons are automatically translated to the user's selected language.
 
-## 🚀 What's New in v4.0.0
+## 🚀 What's New in v4.1.0
 
 - **🆕 Multi-Level LLM Architecture**: OpenAI → Groq → Local LLM → Fallback Plan
-- **🆕 Local LLM Integration**: TinyLlama 1.1B model for offline operation
+- **🆕 Local LLM Integration**: Google Gemma 4 model for offline operation
 - **🆕 Guaranteed Availability**: Bot works even without internet connection
 - **🆕 Enhanced Fallback System**: Robust error handling and service switching
 - **🆕 Improved Plan Quality**: Professional-grade study plan templates
@@ -37,7 +37,7 @@ The bot features a sophisticated 4-tier fallback system that ensures reliable se
 |----------|---------|-------------|----------|
 | **1** | **OpenAI GPT** | Primary model for high-quality plans | Best quality, when available |
 | **2** | **Groq** | Secondary model, OpenAI alternative | Fast fallback, reliable service |
-| **3** | **Local LLM** | TinyLlama 1.1B local model | Offline operation, privacy |
+| **3** | **Local LLM** | Google Gemma 4 local model | Offline operation, privacy |
 | **4** | **Fallback Plan** | Predefined professional template | Guaranteed availability |
 
 ### ⚡ How It Works
@@ -46,7 +46,7 @@ The bot automatically attempts to generate study plans using available services
 
 1. **Primary**: OpenAI API (if `OPENAI_API_KEY` is set and quota available)
 2. **Fallback 1**: [Groq](https://groq.com/) (if `GROQ_API_KEY` is set)
-3. **Fallback 2**: Local LLM (TinyLlama 1.1B model)
+3. **Fallback 2**: Local LLM (Google Gemma 4 model)
 4. **Last Resort**: Local plan generator (comprehensive template)
 
 ### 🔄 Translation Fallback
@@ -131,24 +131,24 @@ pip install -r requirements.txt
 ```
 
 ### 3. Set up Local LLM (Recommended)
-The bot includes a local TinyLlama 1.1B model for offline operation:
+The bot includes a local Google Gemma 4 model for offline operation:
 
-- **Model**: TinyLlama 1.1B Chat v1.0 (Q4_K_M quantized)
+- **Model**: Google Gemma 4 Instruct (GGUF, quantized)
 - **Format**: GGUF format
-- **Size**: ~1.1GB
-- **Requirements**: ~2GB RAM for optimal performance
+- **Size**: depends on variant/quantization (typically several GB)
+- **Requirements**: depends on variant (recommended 8GB+ RAM for 4B class models)
 
 **Important**: The model file is not included in the repository due to size limitations. You must download it separately:
 
 ```bash
 # Download the model (choose one method)
 # Option 1: Using wget
-wget -O models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \
-    "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+wget -O models/google-gemma-4b-it-Q4_K_M.gguf \
+    "<YOUR_GEMMA4_GGUF_DOWNLOAD_URL>"
 
 # Option 2: Using curl
-curl -L -o models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \
-    "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+curl -L -o models/google-gemma-4b-it-Q4_K_M.gguf \
+    "<YOUR_GEMMA4_GGUF_DOWNLOAD_URL>"
 ```
 
 See [models/README.md](models/README.md) for detailed download instructions and troubleshooting.
@@ -161,6 +161,10 @@ Create a `.env` file in the root directory or rename `.env.example` to `.env` an
 BOT_TOKEN=your_telegram_bot_token
 OPENAI_API_KEY=your_openai_api_key
 GROQ_API_KEY=your_groq_api_key
+LOCAL_LLM_MODEL_PATH=models/google-gemma-4b-it-Q4_K_M.gguf
+LOCAL_LLM_CONTEXT=4096
+LOCAL_LLM_THREADS=4
+LOCAL_LLM_MAX_TOKENS=512
 ```
 All environment variables are loaded from `.env` automatically.
 
@@ -200,7 +204,7 @@ EduPlannerBotAI/
 │   └── language.py         # Language selection and filter
 ├── services/               # Core logic and helper functions
 │   ├── llm.py              # Multi-level LLM integration (OpenAI → Groq → Local LLM → Fallback)
-│   ├── local_llm.py        # Local TinyLlama model integration
+│   ├── local_llm.py        # Local Google Gemma 4 model integration
 │   ├── pdf.py              # PDF export
 │   ├── txt.py              # TXT export
 │   ├── reminders.py        # Reminder simulation
@@ -221,7 +225,7 @@ EduPlannerBotAI/
 | **aiogram** | Telegram Bot Framework | 3.x |
 | **OpenAI API** | Primary LLM provider | Latest |
 | **Groq API** | Secondary LLM provider | Latest |
-| **Local LLM** | TinyLlama 1.1B offline | GGUF |
+| **Local LLM** | Google Gemma 4 offline | GGUF |
 | **llama-cpp-python** | Local LLM inference | Latest |
 | **fpdf** | PDF file generation | Latest |
 | **TinyDB** | Lightweight NoSQL database | Latest |
@@ -236,11 +240,11 @@ EduPlannerBotAI/
 - **Testing**: pytest with 100% coverage
 - **Style**: PEP8 compliant
 
-## 📝 Release 4.0.0 Highlights
+## 📝 Release 4.1.0 Highlights
 
 ### 🆕 Major Features
 - **Multi-Level LLM Architecture**: OpenAI → Groq → Local LLM → Fallback Plan
-- **Local LLM Integration**: TinyLlama 1.1B model for offline operation
+- **Local LLM Integration**: Google Gemma 4 model for offline operation
 - **Guaranteed Availability**: Bot works even without internet connection
 - **Enhanced Fallback System**: Robust error handling and service switching
 
@@ -309,4 +313,4 @@ MIT License - see [LICENSE](LICENSE) file for details.
 
 ---
 
-**EduPlannerBotAI v4.0.0** represents a significant milestone, transforming the bot from a simple OpenAI-dependent service into a robust, enterprise-grade system with guaranteed availability and offline operation capabilities. This release sets the foundation for future enhancements while maintaining backward compatibility and improving overall user experience.
\ No newline at end of file
+**EduPlannerBotAI v4.1.0** represents a significant milestone, transforming the bot from a simple OpenAI-dependent service into a robust, enterprise-grade system with guaranteed availability and offline operation capabilities. This release sets the foundation for future enhancements while maintaining backward compatibility and improving overall user experience.
\ No newline at end of file
diff --git a/config.py b/config.py
index abd373b..a400f5a 100644
--- a/config.py
+++ b/config.py
@@ -3,8 +3,31 @@
 
 load_dotenv()
 
+
+def _get_int_env(var_name: str, default: int, min_value: int = 1) -> int:
+    """Parse integer env var safely with fallback to default.
+
+    Invalid or out-of-range values are ignored to keep startup stable.
+    """
+    raw_value = os.getenv(var_name)
+    if raw_value is None:
+        return default
+    try:
+        parsed_value = int(raw_value)
+        if parsed_value < min_value:
+            return default
+        return parsed_value
+    except (TypeError, ValueError):
+        return default
+
+
 TOKEN = os.getenv("BOT_TOKEN")
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
 GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
+
+LOCAL_LLM_MODEL_PATH = os.getenv("LOCAL_LLM_MODEL_PATH", "models/google-gemma-4b-it-Q4_K_M.gguf")
+LOCAL_LLM_CONTEXT = _get_int_env("LOCAL_LLM_CONTEXT", default=4096, min_value=512)
+LOCAL_LLM_THREADS = _get_int_env("LOCAL_LLM_THREADS", default=4, min_value=1)
+LOCAL_LLM_MAX_TOKENS = _get_int_env("LOCAL_LLM_MAX_TOKENS", default=512, min_value=32)
diff --git a/models/README.md b/models/README.md
index 3c508d6..b947acd 100644
--- a/models/README.md
+++ b/models/README.md
@@ -1,130 +1,60 @@
 # Local LLM Models
 
-This directory contains the local language model used by EduPlannerBotAI for offline operation.
+This directory stores the local language model used by EduPlannerBotAI for offline mode.
 
-## Required Model
+## Default model (updated)
 
-**Model**: TinyLlama 1.1B Chat v1.0  
-**Format**: GGUF (quantized)  
-**Size**: ~1.1GB  
-**Quantization**: Q4_K_M (4-bit, optimized for memory and speed)
+**Model family**: Google Gemma 4 (instruction-tuned, GGUF)  
+**Recommended file name**: `google-gemma-4b-it-Q4_K_M.gguf`  
+**Expected path**: `models/google-gemma-4b-it-Q4_K_M.gguf`
 
-## Download Instructions
+> If your GGUF file has a different name, set `LOCAL_LLM_MODEL_PATH` in `.env`.
 
-### Option 1: Direct Download from Hugging Face
+## Quick setup
 
-1. Visit the model page: [TinyLlama-1.1B-Chat-v1.0-GGUF](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF)
-2. Download the file: `tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf`
-3. Place it in this `models/` directory
-4. Ensure the filename matches exactly: `tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf`
+1. Download a Gemma 4 GGUF file from your preferred source.
+2. Put it into the `models/` folder.
+3. Set `.env`:
 
-### Option 2: Using Hugging Face CLI
-
-```bash
-# Install huggingface-hub if not already installed
-pip install huggingface-hub
-
-# Download the model
-huggingface-cli download TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF \
-    tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \
-    --local-dir models/
-```
-
-### Option 3: Using wget/curl
-
-```bash
-# Using wget
-wget -O models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \
-    "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
-
-# Using curl
-curl -L -o models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \
-    "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+```env
+LOCAL_LLM_MODEL_PATH=models/google-gemma-4b-it-Q4_K_M.gguf
+LOCAL_LLM_CONTEXT=4096
+LOCAL_LLM_THREADS=4
+LOCAL_LLM_MAX_TOKENS=512
 ```
 
-## File Structure
-
-After downloading, your directory should look like this:
+## File structure
 
-```
+```text
 models/
 ├── README.md
-└── tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf  # ~1.1GB
+└── google-gemma-4b-it-Q4_K_M.gguf
 ```
 
 ## Verification
 
-Verify the model is correctly downloaded:
-
 ```bash
-# Check file exists and size
-ls -lh models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf
-
-# Expected output:
-# -rw-r--r-- 1 user user 1.1G Jan 1 12:00 tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf
-
-# Check file integrity (optional)
-file models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf
+ls -lh models/google-gemma-4b-it-Q4_K_M.gguf
+file models/google-gemma-4b-it-Q4_K_M.gguf
 ```
 
-## Model Specifications
+## Troubleshooting
 
-- **Architecture**: TinyLlama 1.1B (Llama architecture)
-- **Training Data**: Chat/instruction fine-tuned
-- **Context Length**: 2048 tokens
-- **Quantization**: Q4_K_M (4-bit, optimized)
-- **Memory Usage**: ~2GB RAM during inference
-- **Performance**: Good quality for study plan generation
+### Model not loaded
 
-## Troubleshooting
+If you see:
 
-### Model Not Found Error
-```
+```text
 [Local LLM error: Model not loaded]
 ```
-**Solution**: Ensure the model file is in the correct location with the exact filename.
-
-### Memory Issues
-```
-[Local LLM error: Out of memory]
-```
-**Solution**: 
-- Ensure you have at least 2GB RAM available
-- Close other memory-intensive applications
-- Consider using a smaller model variant
-
-### Slow Performance
-**Solutions**:
-- Ensure you have a multi-core CPU
-- Close unnecessary background processes
-- The first request may be slower due to model loading
-
-## Alternative Models
-
-If you prefer a different model, you can use any GGUF format model:
-
-1. **Llama 2 7B**: Better quality, larger size (~4GB)
-2. **Mistral 7B**: Excellent performance, medium size (~4GB)
-3. **Phi-2**: Good quality, smaller size (~1.4GB)
-
-**Note**: Update the model path in `services/local_llm.py` if using a different model.
-
-## Performance Tips
-
-- **First Run**: The first request will be slower as the model loads into memory
-- **Subsequent Requests**: Much faster after initial loading
-- **Memory**: Keep at least 2GB RAM free for optimal performance
-- **CPU**: Multi-core processors will improve inference speed
-
-## Support
-
-If you encounter issues with the local LLM:
 
-1. Check the bot logs for detailed error messages
-2. Verify the model file is correctly placed
-3. Ensure sufficient system resources
-4. Open an issue on GitHub with error details
+Check:
+- file path in `LOCAL_LLM_MODEL_PATH`
+- read permissions for the model file
+- available RAM/CPU resources
 
-## License
+### Out-of-memory or slow responses
 
-The TinyLlama model is licensed under Apache 2.0. See the [Hugging Face page](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF) for full license details.
+- Reduce context: `LOCAL_LLM_CONTEXT=2048`
+- Use lower-bit quantization if available
+- Close other heavy processes
diff --git a/services/llm.py b/services/llm.py
index 163d507..da87536 100644
--- a/services/llm.py
+++ b/services/llm.py
@@ -8,7 +8,7 @@
 from config import GROQ_API_KEY
 from config import OPENAI_MODEL
 from config import GROQ_MODEL
-from .local_llm import ask_local_llm
+from services.local_llm import ask_local_llm
 
 
 # Configure logging
diff --git a/services/local_llm.py b/services/local_llm.py
index ce0df8c..dad6114 100644
--- a/services/local_llm.py
+++ b/services/local_llm.py
@@ -1,46 +1,81 @@
 import logging
 from llama_cpp import Llama
+from config import (
+    LOCAL_LLM_MODEL_PATH,
+    LOCAL_LLM_CONTEXT,
+    LOCAL_LLM_THREADS,
+    LOCAL_LLM_MAX_TOKENS,
+)
 
 # Configure logging
 logger = logging.getLogger(__name__)
 
+
+def _normalize_max_tokens(max_tokens: int) -> int:
+    """Ensure max_tokens is a safe positive integer for local inference."""
+    try:
+        parsed = int(max_tokens)
+        if parsed < 1:
+            return LOCAL_LLM_MAX_TOKENS
+        return min(parsed, LOCAL_LLM_CONTEXT)
+    except (TypeError, ValueError):
+        return LOCAL_LLM_MAX_TOKENS
+
+
 # Load model once at startup
 try:
     LLM_MODEL = Llama(
-        model_path="models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
-        n_ctx=2048,  # Context window
-        n_threads=4,  # Number of CPU threads
-        verbose=False  # Reduce output noise
+        model_path=LOCAL_LLM_MODEL_PATH,
+        n_ctx=LOCAL_LLM_CONTEXT,
+        n_threads=LOCAL_LLM_THREADS,
+        verbose=False,
+    )
+    logger.info(
+        "Local LLM model loaded successfully from: %s (ctx=%s, threads=%s)",
+        LOCAL_LLM_MODEL_PATH,
+        LOCAL_LLM_CONTEXT,
+        LOCAL_LLM_THREADS,
     )
-    logger.info("Local LLM model loaded successfully")
 except Exception as e:
-    logger.error("Failed to load Local LLM model: %s", e)
+    logger.error("Failed to load Local LLM model from %s: %s", LOCAL_LLM_MODEL_PATH, e)
     LLM_MODEL = None
 
-def ask_local_llm(prompt: str, max_tokens: int = 512) -> str:
-    """Ask local LLM (offline fallback)"""
+
+# pylint: disable=too-many-return-statements
+def ask_local_llm(prompt: str, max_tokens: int = LOCAL_LLM_MAX_TOKENS) -> str:
+    """Ask local LLM (offline fallback)."""
     if LLM_MODEL is None:
         return "[Local LLM error: Model not loaded]"
 
+    if prompt is None or str(prompt).strip() == "":
+        return "[Local LLM error: Empty prompt]"
+
+    safe_max_tokens = _normalize_max_tokens(max_tokens)
+
     try:
-        # Format prompt for better results
-        formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
+        formatted_prompt = (
+            "You are an educational planning assistant. "
+            "Provide a concise, practical response.\n\n"
+            f"User request:\n{prompt}\n\nAssistant response:\n"
+        )
 
         output = LLM_MODEL(
             formatted_prompt,
-            max_tokens=max_tokens,
+            max_tokens=safe_max_tokens,
             temperature=0.7,
             top_p=0.9,
-            stop=["<|im_end|>", "\n\n"]
+            stop=["\n\nUser request:", "<end_of_turn>"],
         )
 
-        if output and "choices" in output and len(output["choices"]) > 0:
-            response = output["choices"][0]["text"].strip()
+        choices = output.get("choices", []) if isinstance(output, dict) else []
+        if choices:
+            response = str(choices[0].get("text", "")).strip()
             if response:
                 logger.info("Local LLM generated response successfully")
                 return response
             logger.warning("Local LLM returned empty response")
             return "[Local LLM error: Empty response]"
+
         logger.warning("Local LLM returned invalid output format")
         return "[Local LLM error: Invalid output format]"
 
diff --git a/tests/test_config.py b/tests/test_config.py
new file mode 100644
index 0000000..b7a9c23
--- /dev/null
+++ b/tests/test_config.py
@@ -0,0 +1,27 @@
+import importlib
+
+
+def test_local_llm_int_env_fallbacks(monkeypatch):
+    monkeypatch.setenv("LOCAL_LLM_CONTEXT", "invalid")
+    monkeypatch.setenv("LOCAL_LLM_THREADS", "0")
+    monkeypatch.setenv("LOCAL_LLM_MAX_TOKENS", "-10")
+
+    import config
+    importlib.reload(config)
+
+    assert config.LOCAL_LLM_CONTEXT == 4096
+    assert config.LOCAL_LLM_THREADS == 4
+    assert config.LOCAL_LLM_MAX_TOKENS == 512
+
+
+def test_local_llm_int_env_valid(monkeypatch):
+    monkeypatch.setenv("LOCAL_LLM_CONTEXT", "8192")
+    monkeypatch.setenv("LOCAL_LLM_THREADS", "8")
+    monkeypatch.setenv("LOCAL_LLM_MAX_TOKENS", "1024")
+
+    import config
+    importlib.reload(config)
+
+    assert config.LOCAL_LLM_CONTEXT == 8192
+    assert config.LOCAL_LLM_THREADS == 8
+    assert config.LOCAL_LLM_MAX_TOKENS == 1024
diff --git a/tests/test_local_llm.py b/tests/test_local_llm.py
new file mode 100644
index 0000000..48f486e
--- /dev/null
+++ b/tests/test_local_llm.py
@@ -0,0 +1,29 @@
+from services import local_llm
+
+
+class DummyModel:
+    """Simple callable model stub for local LLM tests."""
+
+    def __init__(self):
+        self.last_kwargs = {}
+
+    def __call__(self, *_args, **kwargs):
+        self.last_kwargs = kwargs
+        return {"choices": [{"text": "ok"}]}
+
+
+def test_ask_local_llm_empty_prompt(monkeypatch):
+    dummy_model = DummyModel()
+    monkeypatch.setattr(local_llm, "LLM_MODEL", dummy_model)
+    result = local_llm.ask_local_llm("   ")
+    assert result == "[Local LLM error: Empty prompt]"
+
+
+def test_ask_local_llm_normalizes_max_tokens(monkeypatch):
+    dummy_model = DummyModel()
+    monkeypatch.setattr(local_llm, "LLM_MODEL", dummy_model)
+
+    result = local_llm.ask_local_llm("build plan", max_tokens=-1)
+
+    assert result == "ok"
+    assert dummy_model.last_kwargs["max_tokens"] == local_llm.LOCAL_LLM_MAX_TOKENS