From af4e907ebe86708e379cc84db087678d67bd9d4c Mon Sep 17 00:00:00 2001 From: Aleksandr <105745366+AlexTkDev@users.noreply.github.com> Date: Sun, 19 Apr 2026 23:34:51 +0300 Subject: [PATCH] Run pylint workflow only on pull requests --- .github/workflows/pylint.yml | 6 +- README.md | 38 +++++----- config.py | 23 ++++++ models/README.md | 134 +++++++++-------------------------- services/llm.py | 2 +- services/local_llm.py | 63 ++++++++++++---- tests/test_config.py | 27 +++++++ tests/test_local_llm.py | 29 ++++++++ 8 files changed, 185 insertions(+), 137 deletions(-) create mode 100644 tests/test_config.py create mode 100644 tests/test_local_llm.py diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index d29f439..132c312 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -1,17 +1,17 @@ name: Pylint -on: [ push ] +on: [ pull_request ] jobs: build: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.14"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/README.md b/README.md index c6292b3..79b8b6a 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,10 @@ > **Note:** All code comments and docstrings are in English for international collaboration and code clarity. All user-facing messages and buttons are automatically translated to the user's selected language. -## 🚀 What's New in v4.0.0 +## 🚀 What's New in v4.1.0 - **🆕 Multi-Level LLM Architecture**: OpenAI → Groq → Local LLM → Fallback Plan -- **🆕 Local LLM Integration**: TinyLlama 1.1B model for offline operation +- **🆕 Local LLM Integration**: Google Gemma 4 model for offline operation - **🆕 Guaranteed Availability**: Bot works even without internet connection - **🆕 Enhanced Fallback System**: Robust error handling and service switching - **🆕 Improved Plan Quality**: Professional-grade study plan templates @@ -37,7 +37,7 @@ The bot features a sophisticated 4-tier fallback system that ensures reliable se |----------|---------|-------------|----------| | **1** | **OpenAI GPT** | Primary model for high-quality plans | Best quality, when available | | **2** | **Groq** | Secondary model, OpenAI alternative | Fast fallback, reliable service | -| **3** | **Local LLM** | TinyLlama 1.1B local model | Offline operation, privacy | +| **3** | **Local LLM** | Google Gemma 4 local model | Offline operation, privacy | | **4** | **Fallback Plan** | Predefined professional template | Guaranteed availability | ### ⚡ How It Works @@ -46,7 +46,7 @@ The bot automatically attempts to generate study plans using available services 1. **Primary**: OpenAI API (if `OPENAI_API_KEY` is set and quota available) 2. **Fallback 1**: [Groq](https://groq.com/) (if `GROQ_API_KEY` is set) -3. **Fallback 2**: Local LLM (TinyLlama 1.1B model) +3. **Fallback 2**: Local LLM (Google Gemma 4 model) 4. **Last Resort**: Local plan generator (comprehensive template) ### 🔄 Translation Fallback @@ -131,24 +131,24 @@ pip install -r requirements.txt ``` ### 3. Set up Local LLM (Recommended) -The bot includes a local TinyLlama 1.1B model for offline operation: +The bot includes a local Google Gemma 4 model for offline operation: -- **Model**: TinyLlama 1.1B Chat v1.0 (Q4_K_M quantized) +- **Model**: Google Gemma 4 Instruct (GGUF, quantized) - **Format**: GGUF format -- **Size**: ~1.1GB -- **Requirements**: ~2GB RAM for optimal performance +- **Size**: depends on variant/quantization (typically several GB) +- **Requirements**: depends on variant (recommended 8GB+ RAM for 4B class models) **Important**: The model file is not included in the repository due to size limitations. You must download it separately: ```bash # Download the model (choose one method) # Option 1: Using wget -wget -O models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \ - "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" +wget -O models/google-gemma-4b-it-Q4_K_M.gguf \ + "" # Option 2: Using curl -curl -L -o models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \ - "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" +curl -L -o models/google-gemma-4b-it-Q4_K_M.gguf \ + "" ``` See [models/README.md](models/README.md) for detailed download instructions and troubleshooting. @@ -161,6 +161,10 @@ Create a `.env` file in the root directory or rename `.env.example` to `.env` an BOT_TOKEN=your_telegram_bot_token OPENAI_API_KEY=your_openai_api_key GROQ_API_KEY=your_groq_api_key +LOCAL_LLM_MODEL_PATH=models/google-gemma-4b-it-Q4_K_M.gguf +LOCAL_LLM_CONTEXT=4096 +LOCAL_LLM_THREADS=4 +LOCAL_LLM_MAX_TOKENS=512 ``` All environment variables are loaded from `.env` automatically. @@ -200,7 +204,7 @@ EduPlannerBotAI/ │ └── language.py # Language selection and filter ├── services/ # Core logic and helper functions │ ├── llm.py # Multi-level LLM integration (OpenAI → Groq → Local LLM → Fallback) -│ ├── local_llm.py # Local TinyLlama model integration +│ ├── local_llm.py # Local Google Gemma 4 model integration │ ├── pdf.py # PDF export │ ├── txt.py # TXT export │ ├── reminders.py # Reminder simulation @@ -221,7 +225,7 @@ EduPlannerBotAI/ | **aiogram** | Telegram Bot Framework | 3.x | | **OpenAI API** | Primary LLM provider | Latest | | **Groq API** | Secondary LLM provider | Latest | -| **Local LLM** | TinyLlama 1.1B offline | GGUF | +| **Local LLM** | Google Gemma 4 offline | GGUF | | **llama-cpp-python** | Local LLM inference | Latest | | **fpdf** | PDF file generation | Latest | | **TinyDB** | Lightweight NoSQL database | Latest | @@ -236,11 +240,11 @@ EduPlannerBotAI/ - **Testing**: pytest with 100% coverage - **Style**: PEP8 compliant -## 📝 Release 4.0.0 Highlights +## 📝 Release 4.1.0 Highlights ### 🆕 Major Features - **Multi-Level LLM Architecture**: OpenAI → Groq → Local LLM → Fallback Plan -- **Local LLM Integration**: TinyLlama 1.1B model for offline operation +- **Local LLM Integration**: Google Gemma 4 model for offline operation - **Guaranteed Availability**: Bot works even without internet connection - **Enhanced Fallback System**: Robust error handling and service switching @@ -309,4 +313,4 @@ MIT License - see [LICENSE](LICENSE) file for details. --- -**EduPlannerBotAI v4.0.0** represents a significant milestone, transforming the bot from a simple OpenAI-dependent service into a robust, enterprise-grade system with guaranteed availability and offline operation capabilities. This release sets the foundation for future enhancements while maintaining backward compatibility and improving overall user experience. \ No newline at end of file +**EduPlannerBotAI v4.1.0** represents a significant milestone, transforming the bot from a simple OpenAI-dependent service into a robust, enterprise-grade system with guaranteed availability and offline operation capabilities. This release sets the foundation for future enhancements while maintaining backward compatibility and improving overall user experience. \ No newline at end of file diff --git a/config.py b/config.py index abd373b..a400f5a 100644 --- a/config.py +++ b/config.py @@ -3,8 +3,31 @@ load_dotenv() + +def _get_int_env(var_name: str, default: int, min_value: int = 1) -> int: + """Parse integer env var safely with fallback to default. + + Invalid or out-of-range values are ignored to keep startup stable. + """ + raw_value = os.getenv(var_name) + if raw_value is None: + return default + try: + parsed_value = int(raw_value) + if parsed_value < min_value: + return default + return parsed_value + except (TypeError, ValueError): + return default + + TOKEN = os.getenv("BOT_TOKEN") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") GROQ_API_KEY = os.getenv("GROQ_API_KEY") OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant") + +LOCAL_LLM_MODEL_PATH = os.getenv("LOCAL_LLM_MODEL_PATH", "models/google-gemma-4b-it-Q4_K_M.gguf") +LOCAL_LLM_CONTEXT = _get_int_env("LOCAL_LLM_CONTEXT", default=4096, min_value=512) +LOCAL_LLM_THREADS = _get_int_env("LOCAL_LLM_THREADS", default=4, min_value=1) +LOCAL_LLM_MAX_TOKENS = _get_int_env("LOCAL_LLM_MAX_TOKENS", default=512, min_value=32) diff --git a/models/README.md b/models/README.md index 3c508d6..b947acd 100644 --- a/models/README.md +++ b/models/README.md @@ -1,130 +1,60 @@ # Local LLM Models -This directory contains the local language model used by EduPlannerBotAI for offline operation. +This directory stores the local language model used by EduPlannerBotAI for offline mode. -## Required Model +## Default model (updated) -**Model**: TinyLlama 1.1B Chat v1.0 -**Format**: GGUF (quantized) -**Size**: ~1.1GB -**Quantization**: Q4_K_M (4-bit, optimized for memory and speed) +**Model family**: Google Gemma 4 (instruction-tuned, GGUF) +**Recommended file name**: `google-gemma-4b-it-Q4_K_M.gguf` +**Expected path**: `models/google-gemma-4b-it-Q4_K_M.gguf` -## Download Instructions +> If your GGUF file has a different name, set `LOCAL_LLM_MODEL_PATH` in `.env`. -### Option 1: Direct Download from Hugging Face +## Quick setup -1. Visit the model page: [TinyLlama-1.1B-Chat-v1.0-GGUF](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF) -2. Download the file: `tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf` -3. Place it in this `models/` directory -4. Ensure the filename matches exactly: `tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf` +1. Download a Gemma 4 GGUF file from your preferred source. +2. Put it into the `models/` folder. +3. Set `.env`: -### Option 2: Using Hugging Face CLI - -```bash -# Install huggingface-hub if not already installed -pip install huggingface-hub - -# Download the model -huggingface-cli download TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF \ - tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \ - --local-dir models/ -``` - -### Option 3: Using wget/curl - -```bash -# Using wget -wget -O models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \ - "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" - -# Using curl -curl -L -o models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \ - "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" +```env +LOCAL_LLM_MODEL_PATH=models/google-gemma-4b-it-Q4_K_M.gguf +LOCAL_LLM_CONTEXT=4096 +LOCAL_LLM_THREADS=4 +LOCAL_LLM_MAX_TOKENS=512 ``` -## File Structure - -After downloading, your directory should look like this: +## File structure -``` +```text models/ ├── README.md -└── tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf # ~1.1GB +└── google-gemma-4b-it-Q4_K_M.gguf ``` ## Verification -Verify the model is correctly downloaded: - ```bash -# Check file exists and size -ls -lh models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf - -# Expected output: -# -rw-r--r-- 1 user user 1.1G Jan 1 12:00 tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf - -# Check file integrity (optional) -file models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf +ls -lh models/google-gemma-4b-it-Q4_K_M.gguf +file models/google-gemma-4b-it-Q4_K_M.gguf ``` -## Model Specifications +## Troubleshooting -- **Architecture**: TinyLlama 1.1B (Llama architecture) -- **Training Data**: Chat/instruction fine-tuned -- **Context Length**: 2048 tokens -- **Quantization**: Q4_K_M (4-bit, optimized) -- **Memory Usage**: ~2GB RAM during inference -- **Performance**: Good quality for study plan generation +### Model not loaded -## Troubleshooting +If you see: -### Model Not Found Error -``` +```text [Local LLM error: Model not loaded] ``` -**Solution**: Ensure the model file is in the correct location with the exact filename. - -### Memory Issues -``` -[Local LLM error: Out of memory] -``` -**Solution**: -- Ensure you have at least 2GB RAM available -- Close other memory-intensive applications -- Consider using a smaller model variant - -### Slow Performance -**Solutions**: -- Ensure you have a multi-core CPU -- Close unnecessary background processes -- The first request may be slower due to model loading - -## Alternative Models - -If you prefer a different model, you can use any GGUF format model: - -1. **Llama 2 7B**: Better quality, larger size (~4GB) -2. **Mistral 7B**: Excellent performance, medium size (~4GB) -3. **Phi-2**: Good quality, smaller size (~1.4GB) - -**Note**: Update the model path in `services/local_llm.py` if using a different model. - -## Performance Tips - -- **First Run**: The first request will be slower as the model loads into memory -- **Subsequent Requests**: Much faster after initial loading -- **Memory**: Keep at least 2GB RAM free for optimal performance -- **CPU**: Multi-core processors will improve inference speed - -## Support - -If you encounter issues with the local LLM: -1. Check the bot logs for detailed error messages -2. Verify the model file is correctly placed -3. Ensure sufficient system resources -4. Open an issue on GitHub with error details +Check: +- file path in `LOCAL_LLM_MODEL_PATH` +- read permissions for the model file +- available RAM/CPU resources -## License +### Out-of-memory or slow responses -The TinyLlama model is licensed under Apache 2.0. See the [Hugging Face page](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF) for full license details. +- Reduce context: `LOCAL_LLM_CONTEXT=2048` +- Use lower-bit quantization if available +- Close other heavy processes diff --git a/services/llm.py b/services/llm.py index 163d507..da87536 100644 --- a/services/llm.py +++ b/services/llm.py @@ -8,7 +8,7 @@ from config import GROQ_API_KEY from config import OPENAI_MODEL from config import GROQ_MODEL -from .local_llm import ask_local_llm +from services.local_llm import ask_local_llm # Configure logging diff --git a/services/local_llm.py b/services/local_llm.py index ce0df8c..dad6114 100644 --- a/services/local_llm.py +++ b/services/local_llm.py @@ -1,46 +1,81 @@ import logging from llama_cpp import Llama +from config import ( + LOCAL_LLM_MODEL_PATH, + LOCAL_LLM_CONTEXT, + LOCAL_LLM_THREADS, + LOCAL_LLM_MAX_TOKENS, +) # Configure logging logger = logging.getLogger(__name__) + +def _normalize_max_tokens(max_tokens: int) -> int: + """Ensure max_tokens is a safe positive integer for local inference.""" + try: + parsed = int(max_tokens) + if parsed < 1: + return LOCAL_LLM_MAX_TOKENS + return min(parsed, LOCAL_LLM_CONTEXT) + except (TypeError, ValueError): + return LOCAL_LLM_MAX_TOKENS + + # Load model once at startup try: LLM_MODEL = Llama( - model_path="models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", - n_ctx=2048, # Context window - n_threads=4, # Number of CPU threads - verbose=False # Reduce output noise + model_path=LOCAL_LLM_MODEL_PATH, + n_ctx=LOCAL_LLM_CONTEXT, + n_threads=LOCAL_LLM_THREADS, + verbose=False, + ) + logger.info( + "Local LLM model loaded successfully from: %s (ctx=%s, threads=%s)", + LOCAL_LLM_MODEL_PATH, + LOCAL_LLM_CONTEXT, + LOCAL_LLM_THREADS, ) - logger.info("Local LLM model loaded successfully") except Exception as e: - logger.error("Failed to load Local LLM model: %s", e) + logger.error("Failed to load Local LLM model from %s: %s", LOCAL_LLM_MODEL_PATH, e) LLM_MODEL = None -def ask_local_llm(prompt: str, max_tokens: int = 512) -> str: - """Ask local LLM (offline fallback)""" + +# pylint: disable=too-many-return-statements +def ask_local_llm(prompt: str, max_tokens: int = LOCAL_LLM_MAX_TOKENS) -> str: + """Ask local LLM (offline fallback).""" if LLM_MODEL is None: return "[Local LLM error: Model not loaded]" + if prompt is None or str(prompt).strip() == "": + return "[Local LLM error: Empty prompt]" + + safe_max_tokens = _normalize_max_tokens(max_tokens) + try: - # Format prompt for better results - formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" + formatted_prompt = ( + "You are an educational planning assistant. " + "Provide a concise, practical response.\n\n" + f"User request:\n{prompt}\n\nAssistant response:\n" + ) output = LLM_MODEL( formatted_prompt, - max_tokens=max_tokens, + max_tokens=safe_max_tokens, temperature=0.7, top_p=0.9, - stop=["<|im_end|>", "\n\n"] + stop=["\n\nUser request:", ""], ) - if output and "choices" in output and len(output["choices"]) > 0: - response = output["choices"][0]["text"].strip() + choices = output.get("choices", []) if isinstance(output, dict) else [] + if choices: + response = str(choices[0].get("text", "")).strip() if response: logger.info("Local LLM generated response successfully") return response logger.warning("Local LLM returned empty response") return "[Local LLM error: Empty response]" + logger.warning("Local LLM returned invalid output format") return "[Local LLM error: Invalid output format]" diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..b7a9c23 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,27 @@ +import importlib + + +def test_local_llm_int_env_fallbacks(monkeypatch): + monkeypatch.setenv("LOCAL_LLM_CONTEXT", "invalid") + monkeypatch.setenv("LOCAL_LLM_THREADS", "0") + monkeypatch.setenv("LOCAL_LLM_MAX_TOKENS", "-10") + + import config + importlib.reload(config) + + assert config.LOCAL_LLM_CONTEXT == 4096 + assert config.LOCAL_LLM_THREADS == 4 + assert config.LOCAL_LLM_MAX_TOKENS == 512 + + +def test_local_llm_int_env_valid(monkeypatch): + monkeypatch.setenv("LOCAL_LLM_CONTEXT", "8192") + monkeypatch.setenv("LOCAL_LLM_THREADS", "8") + monkeypatch.setenv("LOCAL_LLM_MAX_TOKENS", "1024") + + import config + importlib.reload(config) + + assert config.LOCAL_LLM_CONTEXT == 8192 + assert config.LOCAL_LLM_THREADS == 8 + assert config.LOCAL_LLM_MAX_TOKENS == 1024 diff --git a/tests/test_local_llm.py b/tests/test_local_llm.py new file mode 100644 index 0000000..48f486e --- /dev/null +++ b/tests/test_local_llm.py @@ -0,0 +1,29 @@ +from services import local_llm + + +class DummyModel: + """Simple callable model stub for local LLM tests.""" + + def __init__(self): + self.last_kwargs = {} + + def __call__(self, *_args, **kwargs): + self.last_kwargs = kwargs + return {"choices": [{"text": "ok"}]} + + +def test_ask_local_llm_empty_prompt(monkeypatch): + dummy_model = DummyModel() + monkeypatch.setattr(local_llm, "LLM_MODEL", dummy_model) + result = local_llm.ask_local_llm(" ") + assert result == "[Local LLM error: Empty prompt]" + + +def test_ask_local_llm_normalizes_max_tokens(monkeypatch): + dummy_model = DummyModel() + monkeypatch.setattr(local_llm, "LLM_MODEL", dummy_model) + + result = local_llm.ask_local_llm("build plan", max_tokens=-1) + + assert result == "ok" + assert dummy_model.last_kwargs["max_tokens"] == local_llm.LOCAL_LLM_MAX_TOKENS