diff --git a/.github/workflows/autoblack.yml b/.github/workflows/autoblack.yml index 7e754a2..f7ba73f 100644 --- a/.github/workflows/autoblack.yml +++ b/.github/workflows/autoblack.yml @@ -1,23 +1,23 @@ -# GitHub Action that uses Black to reformat the Python code in an incoming pull request. -# If all Python code in the pull request is compliant with Black then this Action does nothing. -# Othewrwise, Black is run and its changes are committed back to the incoming pull request. -# https://github.com/cclauss/autoblack +# GitHub Action that uses Black to reformat the Python code in an +# incoming pull request. If all Python code in the pull request is +# compliant with Black then this Action does nothing. Othewrwise, +# Black is run and its changes are committed back to the incoming pull +# request. https://github.com/cclauss/autoblack +--- name: autoblack on: [pull_request] jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 - - name: Set up Python 3.13 - uses: actions/setup-python@v5 + - uses: actions/checkout@v6 + - name: Set up Python 3.14 + uses: actions/setup-python@v6 with: python-version: 3.14 - name: Install click, black and isort - run: pip install 'click==8.2.1' 'black==25.11.0' 'isort==5.13.2' - - name: Run isort --check . - run: isort --check . + run: pip install 'click==8.2.1' 'black==25.11.0' 'isort==8.0.1' - name: Run black --check --diff . run: black --check --diff . - name: If needed, commit black changes to the pull request diff --git a/.github/workflows/isort-and-black-checks.yml b/.github/workflows/isort-and-black-checks.yml deleted file mode 100644 index 1273de1..0000000 --- a/.github/workflows/isort-and-black-checks.yml +++ /dev/null @@ -1,32 +0,0 @@ -# GitHub Action that uses Black to reformat the Python code in an incoming pull request. -# If all Python code in the pull request is compliant with Black then this Action does nothing. -# Othewrwise, Black is run and its changes are committed back to the incoming pull request. -# https://github.com/cclauss/autoblack - -name: isort and black check -on: [pull_request] -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - - name: Set up Python 3.14 - uses: actions/setup-python@v5 - with: - python-version: 3.13 - - name: Install click, black and isort - run: pip install 'click==8.0.4' 'black==25.1.0' 'isort==5.13.2' - - name: Run isort --check . - run: isort --check . - - name: Run black --check . - run: black --check . - # - name: If needed, commit black changes to the pull request - # if: failure() - # run: | - # black . - # git config --global user.name 'autoblack' - # git config --global user.email 'rocky@users.noreply.github.com' - # git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY - # git checkout $GITHUB_HEAD_REF - # git commit -am "fixup: Format Python code with Black" - # git push diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 8f23e58..cb5e006 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -1,8 +1,9 @@ +--- name: Mathics3 Module PyICU (macOS) on: push: - branches: [ master ] + branches: [master] pull_request: branches: '**' @@ -14,29 +15,30 @@ jobs: os: [macOS] python-version: ['3.13', '3.14'] steps: - - uses: actions/checkout@v5 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Set ICU version - run: | - echo "PKG_CONFIG_PATH=$(brew --prefix icu4c)/lib/pkgconfig" >> $GITHUB_ENV - - name: Install dependencies - run: | - brew install llvm - python -m pip install --upgrade pip - python -m pip install pytest - # # Go over and comment out stuff when next Mathics3 core and Mathics-scanner are released - # python -m pip install -e git+https://github.com/Mathics3/mathics-scanner#egg=Mathics-Scanner[full] - # git clone https://github.com/Mathics3/mathics-core - # (cd mathics-core && pip3 install -e .[full]) - # (cd mathics-core && bash ./admin-tools/make-JSON-tables.sh) - # python -m pip install -e git+https://github.com/Mathics3/Mathics3-Module-Base#egg=Mathics3-Module-Base - - name: Install Mathic3 PyICU Module - run: | - python -m pip install Mathics3 PyICU - python -m pip install --no-build-isolation -e . - - name: Test Mathics3 Module PyICU - run: | - make check + - uses: actions/checkout@v5 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Set ICU version + run: | + echo "PKG_CONFIG_PATH=$(brew --prefix icu4c)/lib/pkgconfig" >> $GITHUB_ENV + - name: Install dependencies + run: | + brew install llvm + python -m pip install --upgrade pip + python -m pip install pytest + # # Go over and comment out stuff when next Mathics3 core and Mathics-scanner are released + # python -m pip install -e git+https://github.com/Mathics3/mathics-scanner#egg=Mathics-Scanner[full] + # We use recently-added message tags from mathics core. + # Until next mathics-core release... + git clone https://github.com/Mathics3/mathics-core + (cd mathics-core && pip3 install -e .[full]) + # python -m pip install -e git+https://github.com/Mathics3/Mathics3-Module-Base#egg=Mathics3-Module-Base + - name: Install Mathic3 PyICU Module + run: | + python -m pip install Mathics3 PyICU + python -m pip install --no-build-isolation -e . + - name: Test Mathics3 PyICU Module + run: | + make check diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 54a53a8..3ee28e2 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -1,8 +1,9 @@ +--- name: Mathics3-Module-pyICU (ubuntu) on: push: - branches: [ master ] + branches: [master] pull_request: branches: '**' @@ -13,26 +14,27 @@ jobs: matrix: python-version: ['3.12', '3.13', '3.14'] steps: - - uses: actions/checkout@v5 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - sudo apt install libicu-dev - python -m pip install --upgrade pip - python -m pip install pytest - # # Go over and comment out stuff when next Mathics3 core and Mathics-scanner are released - # python -m pip install -e git+https://github.com/Mathics3/mathics-scanner#egg=Mathics-Scanner[full] - # git clone https://github.com/Mathics3/mathics-core - # (cd mathics-core && pip3 install -e .[full]) - # (cd mathics-core && bash ./admin-tools/make-JSON-tables.sh) - # python -m pip install -e git+https://github.com/Mathics3/Mathics3-Module-Base#egg=Mathics3-Module-Base - - name: install Mathic3 PyICU Module - run: | - python -m pip install Mathics3 PyICU - python -m pip install --no-build-isolation -e . - - name: Test Mathics3 PyICU Module - run: | - make check + - uses: actions/checkout@v5 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + sudo apt install libicu-dev + python -m pip install --upgrade pip + python -m pip install pytest + # # Go over and comment out stuff when next Mathics3 core and Mathics-scanner are released + # python -m pip install -e git+https://github.com/Mathics3/mathics-scanner#egg=Mathics-Scanner[full] + # We use recently-added message tags from mathics core. + # Until next mathics-core release... + git clone https://github.com/Mathics3/mathics-core + (cd mathics-core && pip3 install -e .[full]) + # python -m pip install -e git+https://github.com/Mathics3/Mathics3-Module-Base#egg=Mathics3-Module-Base + - name: Install Mathic3 PyICU Module + run: | + python -m pip install Mathics3 PyICU + python -m pip install --no-build-isolation -e . + - name: Test Mathics3 PyICU Module + run: | + make check diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7e4d9cc..cc6612c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,22 +1,23 @@ +--- default_language_version: python: python repos: -- repo: https://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.0.1 hooks: - - id: check-merge-conflict - - id: debug-statements - stages: [commit] - - id: end-of-file-fixer - stages: [commit] - - id: trailing-whitespace -- repo: https://github.com/psf/black + - id: check-merge-conflict + - id: debug-statements + stages: [commit] + - id: end-of-file-fixer + stages: [commit] + - id: trailing-whitespace + - repo: https://github.com/psf/black rev: 25.11.0 hooks: - - id: black - language_version: python3 - exclude: 'mathicsscript/version.py' -- repo: https://github.com/pycqa/flake8 + - id: black + language_version: python3 + exclude: 'pymathics/icu/version.py' + - repo: https://github.com/pycqa/flake8 rev: 3.9.2 hooks: - id: flake8 diff --git a/pymathics/icu/__init__.py b/pymathics/icu/__init__.py index 5e78af5..74fe247 100644 --- a/pymathics/icu/__init__.py +++ b/pymathics/icu/__init__.py @@ -26,7 +26,7 @@ = {ʼ, а, б, в, г, д, е, ж, з, и, й, к, л, м, н, о, п, р, с, т, у, ф, х, ц, ч, ш, щ, ь, ю, я, є, і, ї, ґ} """ -from pymathics.icu.__main__ import Alphabet, AlphabeticOrder, Language +from pymathics.icu.__main__ import Alphabet, AlphabeticOrder from pymathics.icu.version import __version__ pymathics_version_data = { @@ -39,7 +39,6 @@ __all__ = [ "Alphabet", "AlphabeticOrder", - "Language", "pymathics_version_data", "__version__", ] diff --git a/pymathics/icu/__main__.py b/pymathics/icu/__main__.py index 45aac4c..cf4d779 100644 --- a/pymathics/icu/__main__.py +++ b/pymathics/icu/__main__.py @@ -4,13 +4,17 @@ Languages - Human-Language Alphabets and Locales via PyICU. """ -from typing import List, Optional +from dataclasses import dataclass +from typing import Any, Final, Optional -from icu import Collator, Locale, LocaleData +from icu import Collator, Locale, LocaleData, UCollAttribute, UCollAttributeValue +from mathics.builtin.system import LANGUAGE from mathics.core.atoms import Integer, String -from mathics.core.builtin import Builtin, Predefined +from mathics.core.builtin import Builtin from mathics.core.convert.expression import to_mathics_list from mathics.core.evaluation import Evaluation +from mathics.core.symbols import Symbol, SymbolFalse, SymbolTrue, strip_context +from mathics.core.systemsymbols import SymbolAutomatic available_locales = Locale.getAvailableLocales() language2locale = { @@ -18,11 +22,120 @@ for locale_name, availableLocale in available_locales.items() } -# The current value of $Language -LANGUAGE = "English" +LowerFirstSet: Final[set[String]] = {String("System`LowerFirst"), String("LowerFirst")} +StringAutomatic: Final[String] = String("System`Automatic") +StringLanguage: Final[String] = String("Language") +StringUpperFirst: Final[String] = String("UpperFirst") +SymbolLanguage: Final[String] = Symbol("System`$Language") -def eval_alphabet(language_name: String) -> Optional[List[String]]: +@dataclass(frozen=True) +class AlphabeticOrderOptions: + """ + Stores options associated with AlphbeticOrder[] builtin. + + One initialized, this structure is immutable or frozen. + """ + + lowercase_ordering: Optional[bool] = None + """'True" if ordering should be lowercase first, 'False" if should uppercase first, + and 'None' if we should use the natural alphabet ordering case.""" + + ignore_case: bool = False + """whether to ignore upper versus lower case""" + + ignore_diacritics: bool = False + """whether to ignore diacritics for ordering""" + + ignore_punctuation: bool = False + """whether to ignore punctuation for ordering""" + + language: str = LANGUAGE + """what language or alphabet to assume""" + + @classmethod + def from_dict( + cls, options: dict[str, Any], evaluation: Evaluation + ) -> Optional["AlphabeticOrderOptions"]: + """Factory method that normalizes, type-checks, and builds the frozen structure + from a raw dict[str, str]. + """ + key_mapping = { + "System`CaseOrdering": "lowercase_ordering", + "System`IgnoreCase": "ignore_case", + "System`IgnoreDiacritics": "ignore_diacritics", + "System`IgnorePunctuation": "ignore_punctuation", + "System`Language": "language", + } + + # This will hold our cleaned, type-converted parameters + processed_args: dict[str, Any] = { + "lowercase_ordering": None, + "ignore_case": False, + "ignore_diacritics": False, + "ignore_punctuation": False, + "language": LANGUAGE, + } + + # Iterate through the user-provided options dictionary + for raw_key, option_value in options.items(): + normalized_key = key_mapping.get(raw_key) + + if not normalized_key: + evaluation.message( + "AlphabeticOrder", + "nodef", + String(strip_context(raw_key)), + String("AlphabeticOrder"), + ) + return + + # Type parsing and validation based on the target field name + if normalized_key in ( + "ignore_case", + "ignore_diacritics", + "ignore_punctuation", + ): + if option_value not in (SymbolTrue, SymbolFalse): + evaluation.message( + "AlphabeticOrder", + "opttf", + String(strip_context(raw_key)), + option_value, + ) + return + processed_args[normalized_key] = option_value.value + + elif normalized_key == "language": + if option_value is SymbolLanguage: + option_value = String(LANGUAGE) + + # In contrast to True/False values for other options, + # if the Language option is not a string, WMA just ignores the option. + if isinstance(option_value, String): + processed_args[normalized_key] = option_value + + elif normalized_key == "lowercase_ordering": + if (option_value is SymbolAutomatic) or option_value == "Automatic": + processed_args[normalized_key] = None + elif option_value in LowerFirstSet: + processed_args[normalized_key] = True + elif option_value == StringUpperFirst: + processed_args[normalized_key] = False + else: + evaluation.message( + "AlphabeticOrder", + "nodef", + Symbol(raw_key), + String("AlphabeticOrder"), + ) + return + + # Initialize and return the frozen dataclass using our verified arguments + return cls(**processed_args) + + +def eval_alphabet(language_name: String) -> Optional[list[String]]: py_language_name = language_name.value locale = language2locale.get(py_language_name, py_language_name) @@ -32,7 +145,9 @@ def eval_alphabet(language_name: String) -> Optional[List[String]]: return to_mathics_list(*alphabet_set, elements_conversion_fn=String) -def eval_alphabetic_order(string1: str, string2: str, language_name=LANGUAGE) -> int: +def eval_alphabetic_order( + string1: str, string2: str, language_name, options: AlphabeticOrderOptions +) -> int: """ Compare two strings using locale-sensitive alphabetic order. @@ -43,6 +158,52 @@ def eval_alphabetic_order(string1: str, string2: str, language_name=LANGUAGE) -> """ locale_str = language_to_locale(language_name) collator = Collator.createInstance(Locale(locale_str)) + + # Configure Case and Diacritic (Accent) rules via Collator Strength + # - PRIMARY: Only looks at the base letter (ignores case AND accents). + # - SECONDARY: Looks at base letters + accents (ignores case). + # - TERTIARY: Looks at base letters + accents + case (Default strict sorting). + + if options.ignore_case and options.ignore_diacritics: + # Ignore both accent variations and case sizes + collator.setStrength(Collator.PRIMARY) + + elif options.ignore_case and not options.ignore_diacritics: + # Ignore upper vs lower case, but treat 'e' and 'é' as different letters + collator.setStrength(Collator.SECONDARY) + + elif not options.ignore_case and options.ignore_diacritics: + # Ignore accents, but treat 'A' and 'a' as different letters. + # ICU handles this by setting strength to PRIMARY but turning on Case Level. + collator.setStrength(Collator.PRIMARY) + collator.setAttribute(UCollAttribute.CASE_LEVEL, UCollAttributeValue.ON) + + else: + # Default: strict matching on both case and diacritics + collator.setStrength(Collator.TERTIARY) + + # Configure Punctuation ignoring + # In ICU, ignoring punctuation is called "Alternate Handling". Turning it + # to SHIFTED moves punctuation tokens to the very end of the weight table, + # effectively ignoring them during normal alphanumeric string comparison. + if options.ignore_punctuation: + collator.setAttribute( + UCollAttribute.ALTERNATE_HANDLING, UCollAttributeValue.SHIFTED + ) + else: + collator.setAttribute( + UCollAttribute.ALTERNATE_HANDLING, UCollAttributeValue.NON_IGNORABLE + ) + + if options.lowercase_ordering: + collator.setAttribute( + UCollAttribute.CASE_FIRST, UCollAttributeValue.LOWER_FIRST + ) + elif options.lowercase_ordering is False: + collator.setAttribute( + UCollAttribute.CASE_FIRST, UCollAttributeValue.UPPER_FIRST + ) + comparison = collator.compare(string1, string2) if comparison < 0: return 1 @@ -107,11 +268,11 @@ class Alphabet(Builtin): """ messages = { - "nalph": "The alphabet `` is not known or not available.", + "nalph": "The alphabet `1` is not known or not available.", } rules = { - "Alphabet[]": """Alphabet[Pymathics`$Language]""", + "Alphabet[]": """Alphabet[$Language]""", } summary_text = "lowercase letters in an alphabet" @@ -133,16 +294,35 @@ class AlphabeticOrder(Builtin):