From 189e60d66060e0160e2ed923a3868d5cd776c673 Mon Sep 17 00:00:00 2001 From: Sam Hickey Date: Wed, 21 Feb 2024 15:20:36 +0000 Subject: [PATCH 1/8] Base Minimal Entity Support --- .../date_time/base_minimal_configs.py | 2 +- .../date_time/base_minimal_merged.py | 466 +------------- .../date_time/catalan/common_configs.py | 6 +- .../date_time/date_time_recognizer.py | 22 +- .../date_time/minimal/__init__.py | 0 .../date_time/minimal/base_configs.py | 57 ++ .../date_time/minimal/base_minimal_date.py | 58 ++ .../date_time/minimal/base_minimal_merged.py | 57 ++ .../date_time/minimal/base_minimal_time.py | 42 ++ .../date_time/minimal/common_configs.py | 114 ++++ .../minimal/date_extractor_config.py | 161 +++++ .../date_time/minimal/date_parser_config.py | 187 ++++++ .../minimal/merged_extractor_config.py | 63 ++ .../date_time/minimal/merged_parser_config.py | 61 ++ .../minimal/time_extractor_config.py | 48 ++ .../date_time/minimal/time_parser_config.py | 40 ++ .../resources/minimal_date_time.py | 58 ++ .../number_with_unit/minimal/__init__.py | 2 + .../number_with_unit/minimal/extractors.py | 85 +++ .../number_with_unit/minimal/parsers.py | 41 ++ .../number_with_unit_recognizer.py | 11 + .../resources/__init__.py | 1 + .../resources/minimal_numeric_with_unit.py | 570 ++++++++++++++++++ .../recognizers_number/culture.py | 4 +- .../recognizers_number/number/__init__.py | 4 +- .../recognizers_number/number/extractors.py | 3 - .../number/minimal/__init__.py | 2 + .../number/minimal/extractors.py | 206 +++++++ .../number/minimal/parsers.py | 219 +++++++ .../number/number_recognizer.py | 23 +- .../number/parser_factory.py | 10 +- .../recognizers_number/resources/__init__.py | 3 +- .../resources/minimal_numeric.py | 42 ++ .../recognizers_text/culture.py | 7 +- 34 files changed, 2182 insertions(+), 493 deletions(-) create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/__init__.py create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_configs.py create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_date.py create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_merged.py create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_time.py create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/common_configs.py create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_extractor_config.py create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_parser_config.py create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_extractor_config.py create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_parser_config.py create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_extractor_config.py create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_parser_config.py create mode 100644 Python/libraries/recognizers-date-time/recognizers_date_time/resources/minimal_date_time.py create mode 100644 Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/__init__.py create mode 100644 Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/extractors.py create mode 100644 Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/parsers.py create mode 100644 Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/minimal_numeric_with_unit.py create mode 100644 Python/libraries/recognizers-number/recognizers_number/number/minimal/__init__.py create mode 100644 Python/libraries/recognizers-number/recognizers_number/number/minimal/extractors.py create mode 100644 Python/libraries/recognizers-number/recognizers_number/number/minimal/parsers.py create mode 100644 Python/libraries/recognizers-number/recognizers_number/resources/minimal_numeric.py diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_minimal_configs.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_minimal_configs.py index 45ca9fe97b..b02e1ac5e3 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_minimal_configs.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_minimal_configs.py @@ -8,7 +8,7 @@ from .utilities import DateTimeUtilityConfiguration -class MinimalBaseDateParserConfiguration(ABC): +class BaseMinimalDateParserConfiguration(ABC): @property @abstractmethod def cardinal_extractor(self) -> BaseNumberExtractor: diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_minimal_merged.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_minimal_merged.py index 9b9bdaad27..8c5c773cce 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_minimal_merged.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_minimal_merged.py @@ -1,22 +1,16 @@ from abc import abstractmethod, ABC -from typing import List, Optional, Pattern, Dict +from typing import List, Pattern from datetime import datetime -from collections import namedtuple -import regex from recognizers_text.extractor import Extractor, ExtractResult -from .constants import Constants, TimeTypeConstants from .extractors import DateTimeExtractor -from .parsers import DateTimeParser, DateTimeParseResult from .base_date import BaseDateParser from .base_time import BaseTimeParser -from .utilities import Token, merge_all_tokens, DateTimeOptions, DateTimeFormatUtil, DateUtils, RegExpUtility, TimexUtil - -MatchedIndex = namedtuple('MatchedIndex', ['matched', 'index']) +from .utilities import DateTimeOptions +from recognizers_date_time.date_time.base_merged import BaseMergedExtractor, BaseMergedParser class MinimalMergedExtractorConfiguration: - @property @abstractmethod def ambiguous_range_modifier_prefix(self) -> Pattern: @@ -58,14 +52,10 @@ def check_both_before_after(self): raise NotImplementedError -class MinimalMergedExtractor(DateTimeExtractor): - @property - def extractor_type_name(self) -> str: - return Constants.SYS_DATETIME_MERGED +class MinimalMergedExtractor(BaseMergedExtractor): def __init__(self, config: MinimalMergedExtractorConfiguration, options: DateTimeOptions): - self.config = config - self.options = options + super().__init__(config, options) def extract(self, source: str, reference: datetime = None) -> List[ExtractResult]: if reference is None: @@ -89,58 +79,6 @@ def extract(self, source: str, reference: datetime = None) -> List[ExtractResult return result - def add_to(self, destinations: List[ExtractResult], source: List[ExtractResult], text: str) -> List[ExtractResult]: - for value in source: - is_found = False - overlap_indexes: List[int] = list() - first_index = -1 - - for index, destination in enumerate(destinations): - if destination.overlap(value): - is_found = True - if destination.cover(value): - if first_index == -1: - first_index = index - overlap_indexes.append(index) - else: - continue - - if not is_found: - destinations.append(value) - elif overlap_indexes: - temp_dst: List[ExtractResult] = list() - - for index, destination in enumerate(destinations): - if index not in overlap_indexes: - temp_dst.append(destination) - - # insert at the first overlap occurence to keep the order - temp_dst.insert(first_index, value) - destinations = temp_dst - return destinations - - def number_ending_regex_match(self, source: str, extract_results: List[ExtractResult]) -> List[ExtractResult]: - tokens: List[Token] = list() - - for extract_result in extract_results: - if extract_result.type in [Constants.SYS_DATETIME_TIME, Constants.SYS_DATETIME_DATETIME]: - after_str = source[extract_result.start + - extract_result.length:] - match = regex.search( - self.config.number_ending_pattern, after_str) - if match: - new_time = RegExpUtility.get_group(match, Constants.NEW_TIME) - num_res = self.config.integer_extractor.extract(new_time) - if not num_res: - continue - - start_position = extract_result.start + \ - extract_result.length + match.group().index(new_time) - tokens.append( - Token(start_position, start_position + len(new_time))) - - return merge_all_tokens(tokens, source, Constants.SYS_DATETIME_TIME) - class MinimalMergedParserConfiguration(ABC): @property @@ -194,397 +132,7 @@ def date_time_parser(self) -> BaseTimeParser: raise NotImplementedError -class MinimalMergedParser(DateTimeParser): - @property - def parser_type_name(self) -> str: - return Constants.SYS_DATETIME_MERGED +class MinimalMergedParser(BaseMergedParser): def __init__(self, config: MinimalMergedParserConfiguration, options: DateTimeOptions): - self.__date_min_value = DateTimeFormatUtil.format_date( - DateUtils.min_value) - self.__date_time_min_value = DateTimeFormatUtil.format_date_time( - DateUtils.min_value) - self.config = config - self.options = options - - def parse(self, source: ExtractResult, reference: datetime = None) -> Optional[DateTimeParseResult]: - if not reference: - reference = datetime.now() - - # Push, save the MOD string - has_before = False - has_after = False - has_since = False - has_around = False - has_equal = False - has_date_after = False - match_is_after = False - - # "inclusive_mod" means MOD should include the start/end time - # For example, cases like "on or later than", "earlier than or in" have inclusive modifier - has_inclusive_mod = False - mod_str = '' - - result = self.parse_result(source, reference) - if not result: - return None - - # Pop, restore the MOD string - if has_before and result.value: - result.length += len(mod_str) - result.start -= 0 if match_is_after else len(mod_str) - result.text = result.text + mod_str if match_is_after else mod_str + result.text - val = result.value - - val.mod = self.combine_mod(val.mod, TimeTypeConstants.BEFORE_MOD if not has_inclusive_mod else - TimeTypeConstants.UNTIL_MOD) - if has_around: - val.mod = self.combine_mod(TimeTypeConstants.APPROX_MOD, val.mod) - has_around = False - result.value = val - - if has_after and result.value: - result.length += len(mod_str) - result.start -= len(mod_str) - result.text = mod_str + result.text - val = result.value - - val.mod = self.combine_mod(val.mod, TimeTypeConstants.AFTER_MOD if not has_inclusive_mod else - TimeTypeConstants.SINCE_MOD) - if has_around: - val.mod = self.combine_mod(TimeTypeConstants.APPROX_MOD, val.mod) - has_around = False - result.value = val - - if has_since and result.value: - result.length += len(mod_str) - result.start -= len(mod_str) - result.text = mod_str + result.text - val = result.value - val.mod = TimeTypeConstants.SINCE_MOD - if has_around: - val.mod = self.combine_mod(TimeTypeConstants.APPROX_MOD, val.mod) - has_around = False - result.value = val - - if has_around and result.value: - result.length += len(mod_str) - result.start -= len(mod_str) - result.text = mod_str + result.text - val = result.value - val.mod = TimeTypeConstants.APPROX_MOD - result.value = val - - if has_equal and result.value: - result.length += len(mod_str) - result.start -= len(mod_str) - result.text = mod_str + result.text - - if has_date_after and result.value: - result.length += len(mod_str) - result.text = result.text + mod_str - val = result.value - val.mod = self.combine_mod(val.mod, TimeTypeConstants.SINCE_MOD) - result.value = val - has_since = True - - # For cases like "3 pm or later on monday" - match = self.config.suffix_after.match(result.text) - if result.value and (match.start() != 0 if match else match) and \ - result.type == Constants.SYS_DATETIME_DATETIME: - val = result.value - val.mod = self.combine_mod(val.mod, TimeTypeConstants.SINCE_MOD) - result.value = val - has_since = True - - if self.options & DateTimeOptions.SPLIT_DATE_AND_TIME and result.value and result.value.sub_date_time_entities: - result.value = self._date_time_resolution_for_split(result) - else: - result = self.set_parse_result( - result, has_before, has_after, has_since) - - return result - - def parse_result(self, source: ExtractResult, reference: datetime): - if source.type == Constants.SYS_DATETIME_DATE: - result = self.config.date_parser.parse(source, reference) - elif source.type == Constants.SYS_DATETIME_TIME: - result = self.config.time_parser.parse(source, reference) - elif source.type == Constants.SYS_DATETIME_DATETIME: - result = self.config.date_time_parser.parse(source, reference) - else: - return None - - return result - - @staticmethod - def combine_mod(original_mod: str, new_mod: str): - combined_mod = new_mod - - if original_mod: - combined_mod = f"{new_mod}-{original_mod}" - - return combined_mod - - def set_parse_result(self, slot: DateTimeParseResult, has_before: bool, has_after: bool, has_since: bool)\ - -> DateTimeParseResult: - slot.value = self._date_time_resolution( - slot, has_before, has_after, has_since) - slot.type = f'{self.parser_type_name}.' \ - f'{self._determine_date_time_types(slot.type, has_before, has_after, has_since)}' - return slot - - def _get_parse_result(self, extractor_result: Extractor, reference: datetime) -> DateTimeParseResult: - extractor_type = extractor_result.type - if extractor_type == Constants.SYS_DATETIME_DATE: - result = self.config.date_parser.parse(extractor_result, reference) - return result - elif extractor_type == Constants.SYS_DATETIME_TIME: - return self.config.time_parser.parse(extractor_result, reference) - else: - return None - - def _determine_date_time_types(self, dtype: str, has_before: bool, has_after: bool, has_since: bool) -> str: - if self.options & DateTimeOptions.SPLIT_DATE_AND_TIME: - if dtype == Constants.SYS_DATETIME_DATETIME: - return Constants.SYS_DATETIME_TIME - else: - if has_before or has_after or has_since: - if dtype == Constants.SYS_DATETIME_DATE: - return Constants.SYS_DATETIME_DATEPERIOD - - if dtype == Constants.SYS_DATETIME_TIME: - return Constants.SYS_DATETIME_TIMEPERIOD - - if dtype == Constants.SYS_DATETIME_DATETIME: - return Constants.SYS_DATETIME_DATETIMEPERIOD - return dtype - - def _determine_source_entity_type(self, source_type: str, new_type: str, has_mod: bool) -> Optional[str]: - if not has_mod: - return None - - if new_type != source_type: - return Constants.SYS_DATETIME_DATETIMEPOINT - - if new_type == Constants.SYS_DATETIME_DATEPERIOD: - return Constants.SYS_DATETIME_DATETIMEPERIOD - - def _date_time_resolution_for_split(self, slot: DateTimeParseResult) -> List[DateTimeParseResult]: - results = [] - if slot.value.sub_date_time_entities: - sub_entities = slot.value.sub_date_time_entities - - for sub_entity in sub_entities: - result = sub_entity - result.start += slot.start - results += self._date_time_resolution_for_split(result) - else: - slot.value = self._date_time_resolution(slot, False, False, False) - slot.type = f'{self.parser_type_name}.{self._determine_date_time_types(slot.type, False, False, False)}' - results.append(slot) - - return results - - def _date_time_resolution(self, slot: DateTimeParseResult, has_before, has_after, has_since) ->\ - List[Dict[str, str]]: - if not slot: - return None - - result = dict() - resolutions = [] - - dtype = slot.type - output_type = self._determine_date_time_types(dtype, has_before, has_after, has_since) - source_entity = self._determine_source_entity_type(dtype, output_type, has_before or has_after or has_since) - - timex = slot.timex_str - - value = slot.value - if not value: - return None - - is_lunar = value.is_lunar - mod = value.mod - comment = value.comment - - self._add_resolution_fields_any(result, Constants.TIMEX_KEY, timex) - self._add_resolution_fields_any(result, Constants.COMMENT_KEY, comment) - self._add_resolution_fields_any(result, Constants.MOD_KEY, mod) - self._add_resolution_fields_any(result, Constants.TYPE_KEY, output_type) - self._add_resolution_fields_any( - result, Constants.IS_LUNAR_KEY, str(is_lunar).lower() if is_lunar else '') - - future_resolution = value.future_resolution - past_resolution = value.past_resolution - - future = self._generate_from_resolution(dtype, future_resolution, mod) - past = self._generate_from_resolution(dtype, past_resolution, mod) - - future_values = sorted(future.values()) - past_values = sorted(past.values()) - intersect_values = [i for i, j in zip( - future_values, past_values) if i == j] - - if len(intersect_values) == len(past_values) and len(intersect_values) == len(future_values): - if past_values: - self._add_resolution_fields_any( - result, Constants.RESOLVE_KEY, past) - else: - if past_values: - self._add_resolution_fields_any( - result, Constants.RESOLVE_TO_PAST_KEY, past) - if future_values: - self._add_resolution_fields_any( - result, Constants.RESOLVE_TO_FUTURE_KEY, future) - - if comment == Constants.AM_PM_GROUP_NAME: - if Constants.RESOLVE_KEY in result: - self._resolve_ampm(result, Constants.RESOLVE_KEY) - else: - self._resolve_ampm(result, Constants.RESOLVE_TO_PAST_KEY) - self._resolve_ampm(result, Constants.RESOLVE_TO_FUTURE_KEY) - - if TimexUtil._has_double_timex(comment): - TimexUtil._process_double_timex(result, Constants.RESOLVE_TO_FUTURE_KEY, Constants.RESOLVE_TO_PAST_KEY, timex) - - for value in result.values(): - if isinstance(value, dict): - new_values = {} - self._add_resolution_fields( - new_values, Constants.TIMEX_KEY, timex) - self._add_resolution_fields(new_values, Constants.MOD_KEY, mod) - - self._add_resolution_fields(new_values, Constants.TYPE_KEY, output_type) - self._add_resolution_fields(new_values, Constants.IS_LUNAR_KEY, - str(is_lunar).lower() if is_lunar else '') - self._add_resolution_fields(new_values, Constants.SOURCE_TYPE, source_entity) - - for inner_key in value: - new_values[inner_key] = value[inner_key] - - resolutions.append(new_values) - - if not past and not future: - o = {} - o['timex'] = timex - o['type'] = output_type - o['value'] = 'not resolved' - resolutions.append(o) - - return {'values': resolutions} - - def _add_resolution_fields_any(self, dic: Dict[str, str], key: str, value: object): - if isinstance(value, str): - if value: - dic[key] = value - else: - dic[key] = value - - def _add_resolution_fields(self, dic: [str, str], key: str, value: str): - if value: - dic[key] = value - - def _generate_from_resolution(self, dtype: str, resolution: Dict[str, str], mod: str) -> Dict[str, str]: - result = {} - - if dtype == Constants.SYS_DATETIME_DATETIME: - self.__add_single_date_time_to_resolution( - resolution, TimeTypeConstants.DATETIME, mod, result) - elif dtype == Constants.SYS_DATETIME_TIME: - self.__add_single_date_time_to_resolution( - resolution, TimeTypeConstants.TIME, mod, result) - elif dtype == Constants.SYS_DATETIME_DATE: - self.__add_single_date_time_to_resolution( - resolution, TimeTypeConstants.DATE, mod, result) - - return result - - def __add_single_date_time_to_resolution(self, resolutions: Dict[str, str], dtype: str, - mod: str, result: Dict[str, str]): - key = TimeTypeConstants.VALUE - value = resolutions[dtype] - if not value or value.startswith(self.__date_min_value): - return - - if mod: - if mod.startswith(TimeTypeConstants.BEFORE_MOD): - key = TimeTypeConstants.END - elif mod.startswith(TimeTypeConstants.AFTER_MOD): - key = TimeTypeConstants.START - elif mod.startswith(TimeTypeConstants.SINCE_MOD): - key = TimeTypeConstants.START - elif mod.startswith(TimeTypeConstants.UNTIL_MOD): - key = TimeTypeConstants.END - - result[key] = value - - def __add_period_to_resolution(self, resolutions: Dict[str, str], start_type: str, - end_type: str, mod: str, result: Dict[str, str]): - start = resolutions.get(start_type, None) - end = resolutions.get(end_type, None) - if mod: - if mod.startswith(TimeTypeConstants.BEFORE_MOD): - if mod.endswith(TimeTypeConstants.LATE_MOD): - result[TimeTypeConstants.END] = end - else: - result[TimeTypeConstants.END] = start - return - if mod.startswith(TimeTypeConstants.AFTER_MOD): - if mod.endswith(TimeTypeConstants.EARLY_MOD): - result[TimeTypeConstants.START] = start - else: - result[TimeTypeConstants.START] = end - return - if mod == TimeTypeConstants.SINCE_MOD: - result[TimeTypeConstants.START] = start - return - - if not (start and end): - return - - if start.startswith(Constants.INVALID_DATE_STRING) or end.startswith(Constants.INVALID_DATE_STRING): - return - - result[TimeTypeConstants.START] = start - result[TimeTypeConstants.END] = end - - def _resolve_ampm(self, values_map: Dict[str, str], key_name: str): - if key_name not in values_map: - return - resolution = values_map[key_name] - if Constants.TIMEX_KEY not in values_map: - return - timex = values_map[Constants.TIMEX_KEY] - values_map.pop(key_name, None) - values_map[key_name + Constants.AM_GROUP_NAME] = resolution - - resolution_pm = {} - if values_map[Constants.TYPE_KEY] == Constants.SYS_DATETIME_TIME: - resolution_pm[TimeTypeConstants.VALUE] = DateTimeFormatUtil.to_pm( - resolution[TimeTypeConstants.VALUE]) - resolution_pm[Constants.TIMEX_KEY] = DateTimeFormatUtil.to_pm(timex) - elif values_map[Constants.TYPE_KEY] == Constants.SYS_DATETIME_DATETIME: - split_value = resolution[TimeTypeConstants.VALUE].split(' ') - resolution_pm[ - TimeTypeConstants.VALUE] = f'{split_value[0]} {DateTimeFormatUtil.to_pm(split_value[1])}' - resolution_pm[Constants.TIMEX_KEY] = DateTimeFormatUtil.all_str_to_pm(timex) - elif values_map[Constants.TYPE_KEY] == Constants.SYS_DATETIME_TIMEPERIOD: - if TimeTypeConstants.START in resolution: - resolution_pm[TimeTypeConstants.START] = DateTimeFormatUtil.to_pm( - resolution[TimeTypeConstants.START]) - if TimeTypeConstants.END in resolution: - resolution_pm[TimeTypeConstants.END] = DateTimeFormatUtil.to_pm( - resolution[TimeTypeConstants.END]) - resolution_pm[Constants.TIMEX_KEY] = DateTimeFormatUtil.all_str_to_pm(timex) - elif values_map[Constants.TYPE_KEY] == Constants.SYS_DATETIME_DATETIMEPERIOD: - if TimeTypeConstants.START in resolution: - split_value = resolution[TimeTypeConstants.START].split(' ') - resolution_pm[ - TimeTypeConstants.START] = f'{split_value[0]} {DateTimeFormatUtil.to_pm(split_value[1])}' - if TimeTypeConstants.END in resolution: - split_value = resolution[TimeTypeConstants.END].split(' ') - resolution_pm[ - TimeTypeConstants.END] = f'{split_value[0]} {DateTimeFormatUtil.to_pm(split_value[1])}' - resolution_pm[Constants.TIMEX_KEY] = DateTimeFormatUtil.all_str_to_pm(timex) - values_map[key_name + Constants.PM_GROUP_NAME] = resolution_pm \ No newline at end of file + super().__init__(config, options) diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/catalan/common_configs.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/catalan/common_configs.py index 8b0d9bcaf1..adbbc234d2 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/catalan/common_configs.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/catalan/common_configs.py @@ -11,7 +11,7 @@ from ..extractors import DateTimeExtractor from ..parsers import DateTimeParser from ..base_configs import DateTimeUtilityConfiguration -from ..base_minimal_configs import MinimalBaseDateParserConfiguration +from ..base_minimal_configs import BaseMinimalDateParserConfiguration from ..base_date import BaseDateExtractor, DateExtractorConfiguration, BaseDateParser from ..base_time import BaseTimeExtractor, BaseTimeParser from ..base_timezone import BaseTimeZoneParser @@ -42,7 +42,7 @@ def extract(self, source: str, reference: datetime = None) -> List[ExtractResult return result -class CatalanCommonDateTimeParserConfiguration(MinimalBaseDateParserConfiguration): +class CatalanCommonDateTimeParserConfiguration(BaseMinimalDateParserConfiguration): @property def time_zone_parser(self) -> DateTimeParser: return self._time_zone_parser @@ -120,7 +120,7 @@ def utility_configuration(self) -> DateTimeUtilityConfiguration: return self._utility_configuration def __init__(self): - MinimalBaseDateParserConfiguration.__init__(self) + BaseMinimalDateParserConfiguration.__init__(self) self._utility_configuration = CatalanDateTimeUtilityConfiguration() self._time_zone_parser = BaseTimeZoneParser() diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py index 1b7298eb99..96ac42c205 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - from datetime import datetime from typing import List from recognizers_text import Culture, Recognizer @@ -9,6 +6,7 @@ from .models import DateTimeModel from .base_merged import BaseMergedExtractor, BaseMergedParser from .base_minimal_merged import MinimalMergedExtractor, MinimalMergedParser +from .minimal.base_minimal_merged import BaseMinimalMergedExtractor, BaseMinimalMergedParser from .english.common_configs import EnglishCommonDateTimeParserConfiguration from .english.merged_extractor_config import EnglishMergedExtractorConfiguration from .english.merged_parser_config import EnglishMergedParserConfiguration @@ -42,12 +40,16 @@ from .catalan.common_configs import CatalanCommonDateTimeParserConfiguration from .catalan.merged_extractor_config import CatalanMergedExtractorConfiguration from .catalan.merged_parser_config import CatalanMergedParserConfiguration +from .minimal.common_configs import MinimalCommonDateTimeParserConfiguration +from .minimal.merged_extractor_config import BaseMinimalMergedExtractorConfiguration +from .minimal.merged_parser_config import BaseMinimalMergedParserConfiguration class DateTimeRecognizer(Recognizer[DateTimeOptions]): def __init__(self, target_culture: str = None, options: DateTimeOptions = DateTimeOptions.NONE, - lazy_initialization: bool = True): + lazy_initialization: bool = True, dmyDateFormat: bool = True): + self.dmyDateFormat = dmyDateFormat if options < DateTimeOptions.NONE or options > DateTimeOptions.CALENDAR: raise ValueError() super().__init__(target_culture, options, lazy_initialization) @@ -131,12 +133,20 @@ def initialize_configuration(self): MinimalMergedExtractor(CatalanMergedExtractorConfiguration(), options) )) + self.register_model('DateTimeModel', Culture.Minimal, lambda options: DateTimeModel( + BaseMinimalMergedParser(BaseMinimalMergedParserConfiguration( + MinimalCommonDateTimeParserConfiguration(dmyDateFormat=self.dmyDateFormat), + dmyDateFormat=self.dmyDateFormat), options), + BaseMinimalMergedExtractor(BaseMinimalMergedExtractorConfiguration(), options) + )) + def get_datetime_model(self, culture: str = None, fallback_to_default_culture: bool = True) -> Model: return self.get_model('DateTimeModel', culture, fallback_to_default_culture) def recognize_datetime(query: str, culture: str, options: DateTimeOptions = DateTimeOptions.NONE, - reference: datetime = None, fallback_to_default_culture: bool = True) -> List[ModelResult]: - recognizer = DateTimeRecognizer(culture, options) + reference: datetime = None, fallback_to_default_culture: bool = True, + dmyDateFormat: bool = True) -> List[ModelResult]: + recognizer = DateTimeRecognizer(culture, options, dmyDateFormat=dmyDateFormat) model = recognizer.get_datetime_model(culture, fallback_to_default_culture) return model.parse(query, reference) diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/__init__.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_configs.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_configs.py new file mode 100644 index 0000000000..3f4171593c --- /dev/null +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_configs.py @@ -0,0 +1,57 @@ +from typing import Pattern +from recognizers_text.utilities import RegExpUtility +from recognizers_date_time.date_time.base_date import DateTimeUtilityConfiguration +from recognizers_date_time.resources.minimal_date_time import MinimalDateTime + + +class MinimalDateTimeUtilityConfiguration(DateTimeUtilityConfiguration): + + @property + def date_unit_regex(self) -> Pattern: + return None + + @property + def ago_regex(self) -> Pattern: + return None + + @property + def later_regex(self) -> Pattern: + return None + + @property + def in_connector_regex(self) -> Pattern: + return None + + @property + def range_unit_regex(self) -> Pattern: + return None + + @property + def check_both_before_after(self) -> Pattern: + return None + + @property + def range_prefix_regex(self) -> Pattern: + return None + + @property + def am_desc_regex(self) -> Pattern: + return self._am_desc_regex + + @property + def pm_desc__regex(self) -> Pattern: + return self._pm_desc__regex + + @property + def am_pm_desc_regex(self) -> Pattern: + return self._am_pm_desc_regex + + def __init__(self): + + self._am_desc_regex = RegExpUtility.get_safe_reg_exp( + MinimalDateTime.AmDescRegex) + self._pm_desc__regex = RegExpUtility.get_safe_reg_exp( + MinimalDateTime.PmDescRegex) + self._am_pm_desc_regex = RegExpUtility.get_safe_reg_exp( + MinimalDateTime.AmPmDescRegex) + self._check_both_before_after = MinimalDateTime.CheckBothBeforeAfter diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_date.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_date.py new file mode 100644 index 0000000000..42b11ac830 --- /dev/null +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_date.py @@ -0,0 +1,58 @@ +from typing import List, Optional, Dict +from datetime import datetime + +from recognizers_text.extractor import ExtractResult +from ..constants import TimeTypeConstants +from ..parsers import DateTimeParseResult +from recognizers_date_time.date_time.base_date import DateExtractorConfiguration, BaseDateExtractor, \ + DateParserConfiguration, BaseDateParser + + +class BaseMinimalDateExtractor(BaseDateExtractor): + def __init__(self, config: DateExtractorConfiguration): + super().__init__(config) + + def extract(self, source: str, reference: datetime = None) -> List[ExtractResult]: + from ..utilities import merge_all_tokens + + tokens = [] + tokens.extend(self.basic_regex_match(source)) + + result = merge_all_tokens(tokens, source, self.extractor_type_name) + return result + + +class BaseMinimalDateParser(BaseDateParser): + + def __init__(self, config: DateParserConfiguration): + super().__init__(config) + + def parse(self, source: ExtractResult, reference: datetime = None) -> Optional[DateTimeParseResult]: + from ..utilities import DateTimeFormatUtil + if reference is None: + reference = datetime.now() + + result_value: DateTimeParseResult = None + + if source.type is self.parser_type_name: + source_text = source.text.lower() + inner_result = self.parse_basic_regex_match(source_text, reference) + + if not inner_result.success: + inner_result = self.parse_single_number(source_text, reference) + + if inner_result.success: + inner_result.future_resolution: Dict[str, str] = dict() + inner_result.future_resolution[TimeTypeConstants.DATE] = DateTimeFormatUtil.format_date( + inner_result.future_value) + inner_result.past_resolution: Dict[str, str] = dict() + inner_result.past_resolution[TimeTypeConstants.DATE] = DateTimeFormatUtil.format_date( + inner_result.past_value) + result_value = inner_result + + result = DateTimeParseResult(source) + result.value = result_value + result.timex_str = result_value.timex if result_value is not None else '' + result.resolution_str = '' + + return result diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_merged.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_merged.py new file mode 100644 index 0000000000..838978f897 --- /dev/null +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_merged.py @@ -0,0 +1,57 @@ +from typing import List +from datetime import datetime + +from recognizers_text.extractor import ExtractResult +from recognizers_date_time.date_time.constants import Constants +from recognizers_date_time.date_time.utilities import DateTimeOptions +from recognizers_date_time.date_time.base_minimal_merged import MinimalMergedExtractorConfiguration, \ + MinimalMergedParserConfiguration +from recognizers_date_time.date_time.base_merged import BaseMergedExtractor, BaseMergedParser + + +class BaseMinimalMergedExtractor(BaseMergedExtractor): + @property + def extractor_type_name(self) -> str: + return Constants.SYS_DATETIME_MERGED + + def __init__(self, config: MinimalMergedExtractorConfiguration, options: DateTimeOptions): + super().__init__(config, options) + + def extract(self, source: str, reference: datetime = None) -> List[ExtractResult]: + if reference is None: + reference = datetime.now() + + result: List[ExtractResult] = list() + + # The order is important, since there can be conflicts in merging + result = self.add_to( + result, self.config.date_extractor.extract(source, reference), source) + result = self.add_to( + result, self.config.time_extractor.extract(source, reference), source) + + # this should be at the end since if need the extractor to determine the previous text contains time or not + result = self.add_to( + result, self.number_ending_regex_match(source, result), source) + + result = sorted(result, key=lambda x: x.start) + + return result + + +class BaseMinimalMergedParser(BaseMergedParser): + @property + def parser_type_name(self) -> str: + return Constants.SYS_DATETIME_MERGED + + def __init__(self, config: MinimalMergedParserConfiguration, options: DateTimeOptions): + super().__init__(config, options) + + def parse_result(self, source: ExtractResult, reference: datetime): + if source.type == Constants.SYS_DATETIME_DATE: + result = self.config.date_parser.parse(source, reference) + elif source.type == Constants.SYS_DATETIME_TIME: + result = self.config.time_parser.parse(source, reference) + else: + return None + + return result diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_time.py new file mode 100644 index 0000000000..4a4ae33f84 --- /dev/null +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_time.py @@ -0,0 +1,42 @@ +from typing import List +from datetime import datetime + +from recognizers_text.extractor import ExtractResult +from recognizers_date_time.date_time.base_time import BaseTimeExtractor, BaseTimeParser, TimeExtractorConfiguration, \ + TimeParserConfiguration +from recognizers_date_time.date_time.constants import Constants +from recognizers_date_time.date_time.utilities import DateTimeOptions, merge_all_tokens, TimeZoneUtility + + +class BaseMinimalTimeExtractor(BaseTimeExtractor): + @property + def extractor_type_name(self) -> str: + return Constants.SYS_DATETIME_TIME + + def __init__(self, config: TimeExtractorConfiguration): + super().__init__(config) + + def extract(self, source: str, reference: datetime = None) -> List[ExtractResult]: + + if reference is None: + reference = datetime.now() + + tokens = self.basic_regex_match(source) + + result = merge_all_tokens(tokens, source, self.extractor_type_name) + + if (self.config.options & DateTimeOptions.ENABLE_PREVIEW) != 0: + result = TimeZoneUtility().merge_time_zones( + result, + self.config.time_zone_extractor.extract(source, reference), + source + ) + + return result + + +class BaseMinimalTimeParser(BaseTimeParser): + + def __init__(self, config: TimeParserConfiguration): + super().__init__(config) + self.config = config diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/common_configs.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/common_configs.py new file mode 100644 index 0000000000..346830e2c9 --- /dev/null +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/common_configs.py @@ -0,0 +1,114 @@ +from typing import Dict, Pattern +from recognizers_number.number.minimal import MinimalNumberExtractor, MinimalNumberParser +from recognizers_number.number.minimal.parsers import MinimalNumberParserConfiguration +from recognizers_number.number.minimal.extractors import MinimalCardinalExtractor, MinimalIntegerExtractor + +from recognizers_date_time.resources.minimal_date_time import MinimalDateTime +from recognizers_date_time.date_time.extractors import DateTimeExtractor +from recognizers_date_time.date_time.parsers import DateTimeParser +from recognizers_date_time.date_time.base_configs import DateTimeUtilityConfiguration +from recognizers_date_time.date_time.base_minimal_configs import BaseMinimalDateParserConfiguration +from recognizers_date_time.date_time.base_timezone import BaseTimeZoneParser +from recognizers_date_time.date_time.minimal.base_configs import MinimalDateTimeUtilityConfiguration +from recognizers_date_time.date_time.minimal.date_extractor_config import MinimalDateExtractorConfiguration +from recognizers_date_time.date_time.minimal.date_parser_config import MinimalDateParserConfiguration +from recognizers_date_time.date_time.minimal.time_extractor_config import MinimalTimeExtractorConfiguration +from recognizers_date_time.date_time.minimal.time_parser_config import MinimalTimeParserConfiguration +from recognizers_date_time.date_time.minimal.base_minimal_date import BaseMinimalDateParser +from recognizers_date_time.date_time.minimal.base_minimal_time import BaseMinimalTimeExtractor, BaseMinimalTimeParser + + +class MinimalCommonDateTimeParserConfiguration(BaseMinimalDateParserConfiguration): + @property + def month_of_year(self) -> Dict[str, int]: + return self._month_of_year + + @property + def time_zone_parser(self) -> DateTimeParser: + return self._time_zone_parser + + @property + def check_both_before_after(self) -> Pattern: + return self._check_both_before_after + + @property + def cardinal_extractor(self) -> MinimalNumberExtractor: + return self._cardinal_extractor + + @property + def integer_extractor(self) -> MinimalNumberExtractor: + return self._integer_extractor + + @property + def ordinal_extractor(self) -> MinimalNumberExtractor: + return None + + @property + def number_parser(self) -> MinimalNumberParser: + return self._number_parser + + @property + def date_extractor(self) -> DateTimeExtractor: + return self._date_extractor + + @property + def time_extractor(self) -> DateTimeExtractor: + return self._time_extractor + + @property + def date_time_extractor(self) -> DateTimeExtractor: + return None + + @property + def date_parser(self) -> DateTimeParser: + return self._date_parser + + @property + def time_parser(self) -> DateTimeParser: + return self._time_parser + + @property + def date_time_parser(self) -> DateTimeParser: + return None + + @property + def numbers(self) -> Dict[str, int]: + return {} + + @property + def day_of_month(self) -> Dict[str, int]: + return self._day_of_month + + @property + def day_of_week(self) -> Dict[str, int]: + return None + + @property + def unit_map(self): + return {} + + @property + def cardinal_map(self): + return {} + + @property + def utility_configuration(self) -> DateTimeUtilityConfiguration: + return self._utility_configuration + + def __init__(self, dmyDateFormat: bool = True): + BaseMinimalDateParserConfiguration.__init__(self) + + self._month_of_year = MinimalDateTime.MonthOfYear + self._utility_configuration = MinimalDateTimeUtilityConfiguration() + self._time_zone_parser = BaseTimeZoneParser() + self._check_both_before_after = MinimalDateTime.CheckBothBeforeAfter + self._cardinal_extractor = MinimalCardinalExtractor() + self._integer_extractor = MinimalIntegerExtractor() + self._number_parser = MinimalNumberParser( + MinimalNumberParserConfiguration()) + self._date_extractor = BaseMinimalDateParser( + MinimalDateExtractorConfiguration(dmyDateFormat)) + self._time_extractor = BaseMinimalTimeExtractor(MinimalTimeExtractorConfiguration()) + self._date_parser = BaseMinimalDateParser( + MinimalDateParserConfiguration(self)) + self._time_parser = BaseMinimalTimeParser(MinimalTimeParserConfiguration(self)) diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_extractor_config.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_extractor_config.py new file mode 100644 index 0000000000..d1e63a19b2 --- /dev/null +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_extractor_config.py @@ -0,0 +1,161 @@ +from typing import Pattern, List, Dict +from recognizers_number import (BaseNumberExtractor, MinimalNumberParser, MinimalIntegerExtractor, + MinimalNumberParserConfiguration, MinimalOrdinalExtractor) +from recognizers_text.utilities import RegExpUtility +from ...resources.minimal_date_time import MinimalDateTime +from ..extractors import DateTimeExtractor +from ..base_date import DateExtractorConfiguration +from .base_configs import MinimalDateTimeUtilityConfiguration +from ..constants import Constants +from ...resources.base_date_time import BaseDateTime + + +class MinimalDateExtractorConfiguration(DateExtractorConfiguration): + + @property + def month_end(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def week_day_end(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def week_day_start(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def of_month(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def date_unit_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def for_the_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def week_day_and_day_of_month_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def week_day_and_day_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def relative_month_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def week_day_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def prefix_article_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def day_of_week(self) -> Dict[str, int]: + None + + @property + def month_of_year(self) -> Dict[str, int]: + self._month_of_year + + @property + def ordinal_extractor(self): + return self._ordinal_extractor + + @property + def utility_configuration(self): + None + + @property + def strict_relative_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def year_suffix(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def more_than_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def less_than_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def in_connector_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def range_unit_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def since_year_suffix_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def check_both_before_after(self) -> bool: + return self._check_both_before_after + + @property + def date_regex_list(self) -> List[Pattern]: + return self._date_regex_list + + @property + def implicit_date_list(self) -> List[Pattern]: + return self._implicit_date_list + + @property + def integer_extractor(self) -> BaseNumberExtractor: + return self._integer_extractor + + @property + def number_parser(self) -> MinimalNumberParser: + return self._number_parser + + @property + def duration_extractor(self) -> DateTimeExtractor: + return None + + @property + def range_connector_symbol_regex(self) -> Pattern: + return self._range_connector_symbol_regex + + def __init__(self, dmyDateFormat: bool = True): + self._check_both_before_after = False + if dmyDateFormat: + date_extractor_4 = MinimalDateTime.DateExtractor5 + date_extractor_7 = MinimalDateTime.DateExtractor9S + date_extractor_8 = MinimalDateTime.DateExtractor4 + date_extractor_11 = MinimalDateTime.DateExtractor7S + else: + date_extractor_4 = MinimalDateTime.DateExtractor4 + date_extractor_7 = MinimalDateTime.DateExtractor7S + date_extractor_8 = MinimalDateTime.DateExtractor5 + date_extractor_11 = MinimalDateTime.DateExtractor9S + + self._date_regex_list = [ + RegExpUtility.get_safe_reg_exp(date_extractor_4), + RegExpUtility.get_safe_reg_exp(date_extractor_7), + RegExpUtility.get_safe_reg_exp(date_extractor_8), + RegExpUtility.get_safe_reg_exp(date_extractor_11), + ] + + self._implicit_date_list = [] + self._integer_extractor = MinimalIntegerExtractor() + self._number_parser = MinimalNumberParser( + MinimalNumberParserConfiguration()) + self._utility_configuration = MinimalDateTimeUtilityConfiguration() + self._range_connector_symbol_regex = RegExpUtility.get_safe_reg_exp( + BaseDateTime.RangeConnectorSymbolRegex + ) + self._check_both_before_after = MinimalDateTime.CheckBothBeforeAfter + self._ordinal_extractor = MinimalOrdinalExtractor() + self._month_of_year = MinimalDateTime.MonthOfYear diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_parser_config.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_parser_config.py new file mode 100644 index 0000000000..7f72515187 --- /dev/null +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_parser_config.py @@ -0,0 +1,187 @@ +from typing import Pattern, List, Dict + +from recognizers_text.utilities import RegExpUtility +from recognizers_number import BaseNumberExtractor, BaseNumberParser +from ...resources.minimal_date_time import MinimalDateTime +from ..extractors import DateTimeExtractor +from ..parsers import DateTimeParser +from ..utilities import DateTimeUtilityConfiguration +from ..base_date import DateParserConfiguration +from ..base_configs import BaseDateParserConfiguration + + +class MinimalDateParserConfiguration(DateParserConfiguration): + @property + def on_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def special_day_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def special_day_with_num_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def next_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def unit_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def month_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def week_day_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def last_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def this_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def week_day_of_month_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def for_the_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def week_day_and_day_of_month_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def week_day_and_day_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def relative_month_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + + @property + def relative_week_day_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def date_token_prefix(self) -> str: + return "" + + def get_swift_day(self, source: str) -> int: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + def get_swift_month(self, source: str) -> int: + return None + + def is_cardinal_last(self, source: str) -> bool: + return None + + @property + def check_both_before_after(self) -> bool: + return self._check_both_before_after + + @property + def ordinal_extractor(self) -> BaseNumberExtractor: + return self._ordinal_extractor + + @property + def integer_extractor(self) -> BaseNumberExtractor: + return self._integer_extractor + + @property + def cardinal_extractor(self) -> BaseNumberExtractor: + return self._cardinal_extractor + + @property + def date_extractor(self) -> DateTimeExtractor: + return self._date_extractor + + @property + def duration_extractor(self) -> DateTimeExtractor: + return None + + + @property + def duration_parser(self) -> DateTimeParser: + return None + + + @property + def number_parser(self) -> BaseNumberParser: + return self._number_parser + + @property + def month_of_year(self) -> Dict[str, int]: + return self._month_of_year + + @property + def day_of_month(self) -> Dict[str, int]: + return self._day_of_month + + @property + def day_of_week(self) -> Dict[str, int]: + return self._day_of_week + + @property + def unit_map(self) -> Dict[str, str]: + return self._unit_map + + @property + def cardinal_map(self) -> Dict[str, int]: + return self._cardinal_map + + @property + def date_regex(self) -> List[Pattern]: + return self._date_regex + + @property + def utility_configuration(self) -> DateTimeUtilityConfiguration: + return self._utility_configuration + + _relative_day_regex = None + + _next_prefix_regex = None + + _past_prefix_regex = None + + + def __init__(self, config: BaseDateParserConfiguration): + self._ordinal_extractor = config.ordinal_extractor + self._integer_extractor = config.integer_extractor + self._cardinal_extractor = config.cardinal_extractor + self._date_extractor = config.date_extractor + self._number_parser = config.number_parser + self._month_of_year = config.month_of_year + self._day_of_month = config.day_of_month + self._day_of_week = config.day_of_week + self._unit_map = config.unit_map + self._cardinal_map = config.cardinal_map + self._date_regex = config.date_extractor.config.date_regex_list + self._utility_configuration = config.utility_configuration + self._check_both_before_after = MinimalDateTime.CheckBothBeforeAfter + + def __normalize(self, source: str) -> str: + return source.replace('á', 'a').replace('é', 'e').replace('í', 'i').replace('ó', 'o').\ + replace('ú', 'u').replace('à', 'a') diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_extractor_config.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_extractor_config.py new file mode 100644 index 0000000000..a80fed58c3 --- /dev/null +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_extractor_config.py @@ -0,0 +1,63 @@ +from typing import List, Pattern + +from recognizers_text import RegExpUtility, Extractor +from recognizers_number import MinimalIntegerExtractor +from recognizers_date_time.resources.minimal_date_time import MinimalDateTime +from recognizers_date_time.date_time.extractors import DateTimeExtractor +from recognizers_date_time.date_time.base_minimal_merged import MinimalMergedExtractorConfiguration +from recognizers_date_time.date_time.minimal.base_minimal_date import BaseMinimalDateExtractor +from recognizers_date_time.date_time.minimal.base_minimal_time import BaseMinimalTimeExtractor +from recognizers_date_time.date_time.minimal.date_extractor_config import MinimalDateExtractorConfiguration +from recognizers_date_time.date_time.minimal.time_extractor_config import MinimalTimeExtractorConfiguration +from recognizers_date_time.resources.base_date_time import BaseDateTime + + +class BaseMinimalMergedExtractorConfiguration(MinimalMergedExtractorConfiguration): + @property + def check_both_before_after(self): + return self._check_both_before_after + + @property + def date_extractor(self) -> DateTimeExtractor: + return self._date_extractor + + @property + def time_extractor(self) -> DateTimeExtractor: + return self._time_extractor + + @property + def date_time_extractor(self) -> DateTimeExtractor: + return None + + @property + def integer_extractor(self) -> Extractor: + return self._integer_extractor + + @property + def equal_regex(self) -> Pattern: + return self._equal_regex + + @property + def ambiguous_range_modifier_prefix(self) -> None: + return None + + @property + def potential_ambiguous_range_regex(self) -> None: + return None + + @property + def number_ending_pattern(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def filter_word_regex_list(self) -> List[Pattern]: + return self._filter_word_regex_list + + def __init__(self): + self._date_extractor = BaseMinimalDateExtractor( + MinimalDateExtractorConfiguration()) + self._time_extractor = BaseMinimalTimeExtractor(MinimalTimeExtractorConfiguration()) + self._integer_extractor = MinimalIntegerExtractor() + self._filter_word_regex_list = [] + self._equal_regex = BaseDateTime.EqualRegex + self._check_both_before_after = MinimalDateTime.CheckBothBeforeAfter diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_parser_config.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_parser_config.py new file mode 100644 index 0000000000..61831e21af --- /dev/null +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_parser_config.py @@ -0,0 +1,61 @@ +from typing import Pattern + +from recognizers_text.utilities import RegExpUtility +from recognizers_date_time.date_time.minimal.common_configs import MinimalCommonDateTimeParserConfiguration +from recognizers_date_time.date_time.base_date import BaseDateParser +from recognizers_date_time.date_time.base_time import BaseTimeParser +from recognizers_date_time.date_time.base_minimal_merged import MinimalMergedParserConfiguration +from recognizers_date_time.resources.minimal_date_time import MinimalDateTime, BaseDateTime +from recognizers_date_time.date_time.parsers import DateTimeParser + + +class BaseMinimalMergedParserConfiguration(MinimalCommonDateTimeParserConfiguration, MinimalMergedParserConfiguration): + @property + def around_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def equal_regex(self) -> Pattern: + return self._equal_regex + + @property + def year_regex(self) -> Pattern: + return self._year_regex + + @property + def suffix_after(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def before_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def after_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def since_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def date_parser(self) -> BaseDateParser: + return self._date_parser + + @property + def time_parser(self) -> BaseTimeParser: + return self._time_parser + + @property + def date_time_parser(self) -> BaseTimeParser: + return None + + @property + def time_zone_parser(self) -> DateTimeParser: + return self._time_zone_parser + + def __init__(self, config, dmyDateFormat = True): + MinimalCommonDateTimeParserConfiguration.__init__(self, dmyDateFormat) + self._time_zone_parser = config.time_zone_parser + self._equal_regex = RegExpUtility.get_safe_reg_exp(BaseDateTime.EqualRegex) + self._year_regex = RegExpUtility.get_safe_reg_exp(MinimalDateTime.YearRegex) diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_extractor_config.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_extractor_config.py new file mode 100644 index 0000000000..5f2a628e09 --- /dev/null +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_extractor_config.py @@ -0,0 +1,48 @@ +from typing import List, Pattern +from recognizers_text.utilities import RegExpUtility +from ...resources.minimal_date_time import MinimalDateTime +from ..base_time import TimeExtractorConfiguration + + +class MinimalTimeExtractorConfiguration(TimeExtractorConfiguration): + @property + def time_zone_extractor(self): + return None + + @property + def options(self): + return self._options + + @property + def dmy_date_format(self) -> bool: + return self._dmy_date_format + + @property + def time_regex_list(self) -> List[Pattern]: + return self._time_regex_list + + @property + def at_regex(self) -> Pattern: + return None + + @property + def ish_regex(self) -> Pattern: + return None + + @property + def time_before_after_regex(self) -> Pattern: + return None + + def __init__(self): + super().__init__() + self._time_regex_list: List[Pattern] = MinimalTimeExtractorConfiguration.get_time_regex_list() + + @staticmethod + def get_time_regex_list() -> List[Pattern]: + return [ + RegExpUtility.get_safe_reg_exp(MinimalDateTime.TimeRegex1), + RegExpUtility.get_safe_reg_exp(MinimalDateTime.TimeRegex2), + RegExpUtility.get_safe_reg_exp(MinimalDateTime.TimeRegex3), + RegExpUtility.get_safe_reg_exp(MinimalDateTime.TimeRegex12), + RegExpUtility.get_safe_reg_exp(MinimalDateTime.ConnectNumRegex) + ] diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_parser_config.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_parser_config.py new file mode 100644 index 0000000000..c78ec4684c --- /dev/null +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_parser_config.py @@ -0,0 +1,40 @@ +from typing import List, Pattern, Dict + +from recognizers_text import RegExpUtility +from ..base_time import TimeParserConfiguration +from ..base_configs import BaseDateParserConfiguration, DateTimeUtilityConfiguration +from .time_extractor_config import MinimalTimeExtractorConfiguration +from ..parsers import DateTimeParser + + +class MinimalTimeParserConfiguration(TimeParserConfiguration): + @property + def time_token_prefix(self) -> str: + return "" + + @property + def at_regex(self) -> Pattern: + return RegExpUtility.get_safe_reg_exp(f'^[.]') + + @property + def time_regexes(self) -> List[Pattern]: + return self._time_regexes + + @property + def numbers(self) -> Dict[str, int]: + return self._numbers + + @property + def utility_configuration(self) -> DateTimeUtilityConfiguration: + return self._utility_configuration + + @property + def time_zone_parser(self) -> DateTimeParser: + return self._time_zone_parser + + def __init__(self, config: BaseDateParserConfiguration): + self._time_regexes: List[Pattern] = MinimalTimeExtractorConfiguration.get_time_regex_list() + + self._utility_configuration = config.utility_configuration + self._numbers: Dict[str, int] = config.numbers + self._time_zone_parser = config.time_zone_parser diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/minimal_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/minimal_date_time.py new file mode 100644 index 0000000000..45ae94c5e7 --- /dev/null +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/minimal_date_time.py @@ -0,0 +1,58 @@ +from .base_date_time import BaseDateTime + + +# pylint: disable=line-too-long + + +class MinimalDateTime: + LangMarker = 'min' + CheckBothBeforeAfter = False + DayRegex = f'\\b(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)(?:\\.[º°])?(?=\\b|t)' + MonthNumRegex = f'(?1[0-2]|(0)?[1-9])\\b' + AmDescRegex = f'({BaseDateTime.BaseAmDescRegex})' + PmDescRegex = f'({BaseDateTime.BasePmDescRegex})' + AmPmDescRegex = f'({BaseDateTime.BaseAmPmDescRegex})' + DescRegex = f'(?({AmDescRegex}|{PmDescRegex}))' + TwoDigitYearRegex = f'\\b(?([0-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d))|\\.?[º°ª])\\b' + YearRegex = f'({BaseDateTime.FourDigitYearRegex})' + MonthNumWithYearRegex = f'\\b(({YearRegex}(\\s*?)[/\\-\\.~](\\s*?){MonthNumRegex})|({MonthNumRegex}(\\s*?)[/\\-\\.~](\\s*?){YearRegex}))\\b' + DateYearRegex = f'(?{YearRegex}|(?2[0-4]|[0-1]?\\d)' + BasicTime = f'(?{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})' + ConnectNumRegex = f'({BaseDateTime.HourRegex}(?[0-5][0-9])\\s*{DescRegex})' + TimeRegexWithDotConnector = f'({BaseDateTime.HourRegex}\\.{BaseDateTime.MinuteRegex})' + TimeRegex1 = f'({BaseDateTime.HourRegex})\\s*({DescRegex})' + TimeRegex2 = f'(t)?{BaseDateTime.HourRegex}(\\s*)?:(\\s*)?{BaseDateTime.MinuteRegex}((\\s*)?:(\\s*)?{BaseDateTime.SecondRegex})?(\\s*({DescRegex})|\\b)' + TimeRegex3 = f'\\b({TimeRegexWithDotConnector}(\\s*({DescRegex})))' + TimeRegex12 = f'{BaseDateTime.HourRegex}(\\s*){BaseDateTime.MinuteRegex}(\\s*{DescRegex})?' + + MonthOfYear = dict([("1", 1), + ("2", 2), + ("3", 3), + ("4", 4), + ("5", 5), + ("6", 6), + ("7", 7), + ("8", 8), + ("9", 9), + ("10", 10), + ("11", 11), + ("12", 12), + ("01", 1), + ("02", 2), + ("03", 3), + ("04", 4), + ("05", 5), + ("06", 6), + ("07", 7), + ("08", 8), + ("09", 9) + ]) + + DefaultLanguageFallback = 'DMY' +# pylint: enable=line-too-long diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/__init__.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/__init__.py new file mode 100644 index 0000000000..c27932f374 --- /dev/null +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/__init__.py @@ -0,0 +1,2 @@ +from .extractors import * +from .parsers import * \ No newline at end of file diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/extractors.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/extractors.py new file mode 100644 index 0000000000..475c7d351f --- /dev/null +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/extractors.py @@ -0,0 +1,85 @@ +from typing import Dict, List, Pattern + +from recognizers_text.culture import Culture +from recognizers_text.extractor import Extractor +from recognizers_text.utilities import RegExpUtility +from recognizers_number.culture import CultureInfo +from recognizers_number.number.models import NumberMode +from recognizers_number.number.minimal.extractors import MinimalNumberExtractor +from recognizers_number_with_unit.number_with_unit.constants import Constants +from recognizers_number_with_unit.number_with_unit.extractors import NumberWithUnitExtractorConfiguration +from recognizers_number_with_unit.resources.minimal_numeric_with_unit import MinimalNumericWithUnit +from recognizers_number_with_unit.resources.base_units import BaseUnits + + +class MinimalNumberWithUnitExtractorConfiguration(NumberWithUnitExtractorConfiguration): + @property + def ambiguity_filters_dict(self) -> Dict[Pattern, Pattern]: + return None + + @property + def unit_num_extractor(self) -> Extractor: + return self._unit_num_extractor + + @property + def build_prefix(self) -> str: + return self._build_prefix + + @property + def build_suffix(self) -> str: + return self._build_suffix + + @property + def connector_token(self) -> str: + return MinimalNumericWithUnit.ConnectorToken + + @property + def compound_unit_connector_regex(self) -> Pattern: + return self._compound_unit_connector_regex + + @property + def non_unit_regex(self) -> Pattern: + return self._pm_non_unit_regex + + @property + def ambiguous_unit_number_multiplier_regex(self) -> Pattern: + return None + + def expand_half_suffix(self, source, result, numbers): + pass + + def __init__(self, culture_info: CultureInfo): + if culture_info is None: + culture_info = CultureInfo(Culture.Minimal) + super().__init__(culture_info) + self._unit_num_extractor = MinimalNumberExtractor(NumberMode.Unit) + self._build_prefix = MinimalNumericWithUnit.BuildPrefix + self._build_suffix = MinimalNumericWithUnit.BuildSuffix + self._compound_unit_connector_regex = RegExpUtility.get_safe_reg_exp( + MinimalNumericWithUnit.CompoundUnitConnectorRegex) + self._pm_non_unit_regex = RegExpUtility.get_safe_reg_exp( + BaseUnits.PmNonUnitRegex) + + +class MinimalCurrencyExtractorConfiguration(MinimalNumberWithUnitExtractorConfiguration): + @property + def extract_type(self) -> str: + return Constants.SYS_UNIT_CURRENCY + + @property + def suffix_list(self) -> Dict[str, str]: + return self._suffix_list + + @property + def prefix_list(self) -> Dict[str, str]: + return self._prefix_list + + @property + def ambiguous_unit_list(self) -> List[str]: + return self._ambiguous_unit_list + + def __init__(self, culture_info: CultureInfo = None): + super().__init__(culture_info) + self._suffix_list = MinimalNumericWithUnit.CurrencySuffixList + self._prefix_list = MinimalNumericWithUnit.CurrencyPrefixList + self._ambiguous_unit_list = MinimalNumericWithUnit.AmbiguousCurrencyUnitList \ No newline at end of file diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/parsers.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/parsers.py new file mode 100644 index 0000000000..a16096fa89 --- /dev/null +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/parsers.py @@ -0,0 +1,41 @@ +from recognizers_text.culture import Culture +from recognizers_text.extractor import Extractor +from recognizers_text.parser import Parser +from recognizers_number.culture import CultureInfo +from recognizers_number.number.minimal.extractors import MinimalNumberExtractor, NumberMode +from recognizers_number.number.parser_factory import AgnosticNumberParserFactory, ParserType +from recognizers_number.number.minimal.parsers import MinimalNumberParserConfiguration +from recognizers_number_with_unit.number_with_unit.parsers import NumberWithUnitParserConfiguration +from recognizers_number_with_unit.resources.minimal_numeric_with_unit import MinimalNumericWithUnit + + +class MinimalNumberWithUnitParserConfiguration(NumberWithUnitParserConfiguration): + @property + def internal_number_parser(self) -> Parser: + return self._internal_number_parser + + @property + def internal_number_extractor(self) -> Extractor: + return self._internal_number_extractor + + @property + def connector_token(self) -> str: + return MinimalNumericWithUnit.ConnectorToken + + def __init__(self, culture_info: CultureInfo): + if culture_info is None: + culture_info = CultureInfo(Culture.Minimal) + super().__init__(culture_info) + self._internal_number_extractor = MinimalNumberExtractor( + NumberMode.DEFAULT) + self._internal_number_parser = AgnosticNumberParserFactory.get_parser( + ParserType.NUMBER, MinimalNumberParserConfiguration(culture_info)) + + +class MinimalCurrencyParserConfiguration(MinimalNumberWithUnitParserConfiguration): + def __init__(self, culture_info: CultureInfo = None): + super().__init__(culture_info) + self.add_dict_to_unit_map(MinimalNumericWithUnit.CurrencySuffixList) + self.add_dict_to_unit_map(MinimalNumericWithUnit.CurrencyPrefixList) + self.currency_name_to_iso_code_map = MinimalNumericWithUnit.CurrencyNameToIsoCodeMap + self.currency_fraction_code_list = MinimalNumericWithUnit.FractionalUnitNameToCodeMap \ No newline at end of file diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/number_with_unit_recognizer.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/number_with_unit_recognizer.py index 96d0606e52..31c5512786 100644 --- a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/number_with_unit_recognizer.py +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/number_with_unit_recognizer.py @@ -64,6 +64,8 @@ from .catalan.parsers import CatalanCurrencyParserConfiguration from recognizers_number_with_unit.number_with_unit.arabic.extractors import ArabicCurrencyExtractorConfiguration from recognizers_number_with_unit.number_with_unit.arabic.parsers import ArabicCurrencyParserConfiguration +from recognizers_number_with_unit.number_with_unit.minimal.extractors import MinimalCurrencyExtractorConfiguration +from recognizers_number_with_unit.number_with_unit.minimal.parsers import MinimalCurrencyParserConfiguration class NumberWithUnitOptions(IntFlag): @@ -298,6 +300,15 @@ def initialize_configuration(self): ])) # endregion + # region Catalan + self.register_model('CurrencyModel', Culture.Minimal, lambda options: CurrencyModel([ + ExtractorParserModel( + BaseMergedUnitExtractor( + MinimalCurrencyExtractorConfiguration()), + BaseMergedUnitParser(MinimalCurrencyParserConfiguration())) + ])) + # endregion + def get_age_model(self, culture: str = None, fallback_to_default_culture: bool = True) -> Model: return self.get_model('AgeModel', culture, fallback_to_default_culture) diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/__init__.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/__init__.py index 566ce494c4..4385448ab4 100644 --- a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/__init__.py +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/__init__.py @@ -10,3 +10,4 @@ from .japanese_numeric_with_unit import JapaneseNumericWithUnit from .catalan_numeric_with_unit import CatalanNumericWithUnit from .arabic_numeric_with_unit import ArabicNumericWithUnit +from .minimal_numeric_with_unit import MinimalNumericWithUnit diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/minimal_numeric_with_unit.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/minimal_numeric_with_unit.py new file mode 100644 index 0000000000..7247a8649e --- /dev/null +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/minimal_numeric_with_unit.py @@ -0,0 +1,570 @@ +from .base_numbers import BaseNumbers +# pylint: disable=line-too-long + + +class MinimalNumericWithUnit: + CurrencySuffixList = dict([("Abkhazian apsar", "abkhazian apsar|apsars"), + ("Afghan afghani", "afghan afghani|؋|afn|afghanis|afghani"), + ("Pul", "pul"), + ("Euro", "euros|euro|€|eur"), + ("Cent", "cents|cent|-cents|-cent"), + ("Albanian lek", "albanian lek|leks|lek"), + ("Qindarkë", "qindarkë|qindarkës|qindarke|qindarkes"), + ("Angolan kwanza", "angolan kwanza|kz|aoa|kwanza|kwanzas|angolan kwanzas"), + ("Armenian dram", "armenian drams|armenian dram"), + ("Aruban florin", "aruban florin|ƒ|awg|aruban florins"), + ("Bangladeshi taka", "bangladeshi taka|৳|bdt|taka|takas|bangladeshi takas"), + ("Paisa", "poisha|paisa"), + ("Bhutanese ngultrum", "bhutanese ngultrum|nu.|btn"), + ("Chetrum", "chetrums|chetrum"), + ("Bolivian boliviano", "bolivian boliviano|bob|bs.|bolivia boliviano|bolivia bolivianos|bolivian bolivianos"), + ("Bosnia and Herzegovina convertible mark", "bosnia and herzegovina convertible mark|bam"), + ("Fening", "fenings|fenings"), + ("Botswana pula", "botswana pula|bwp|pula|pulas|botswana pulas"), + ("Thebe", "thebe"), + ("Brazilian real", "brazilian real|r$|brl|brazil real|brazil reals|brazilian reals"), + ("Bulgarian lev", "bulgarian lev|bgn|лв|bulgaria lev|bulgaria levs|bulgarian levs"), + ("Stotinka", "stotinki|stotinka"), + ("Cambodian riel", "cambodian riel|khr|៛|cambodia riel|cambodia riels|cambodian riels"), + ("Cape Verdean escudo", "cape verdean escudo|cve"), + ("Costa Rican colón", "costa rican colón|costa rican colóns|crc|₡|costa rica colón|costa rica colóns|costa rican colon|costa rican colons|costa rica colon|costa rica colons"), + ("Salvadoran colón", "svc|salvadoran colón|salvadoran colóns|salvador colón|salvador colóns|salvadoran colon|salvadoran colons|salvador colon|salvador colons"), + ("Céntimo", "céntimo"), + ("Croatian kuna", "croatian kuna|kn|hrk|croatia kuna|croatian kunas|croatian kuna kunas"), + ("Lipa", "lipa"), + ("Czech koruna", "czech koruna|czk|kč|czech korunas"), + ("Haléř", "haléř"), + ("Eritrean nakfa", "eritrean nakfa|nfk|ern|eritrean nakfas"), + ("Ethiopian birr", "ethiopian birr|etb"), + ("Gambian dalasi", "gmd"), + ("Butut", "bututs|butut"), + ("Georgian lari", "georgian lari|lari|gel|₾"), + ("Tetri", "tetri"), + ("Ghanaian cedi", "ghanaian cedi|ghs|₵|gh₵"), + ("Pesewa", "pesewas|pesewa"), + ("Guatemalan quetzal", "guatemalan quetzal|gtq|guatemala quetzal"), + ("Haitian gourde", "haitian gourde|htg"), + ("Honduran lempira", "honduran lempira|hnl"), + ("Hungarian forint", "hungarian forint|huf|ft|hungary forint|hungary forints|hungarian forints"), + ("Fillér", "fillér"), + ("Iranian rial", "iranian rial|irr|iran rial|iran rials|iranian rials"), + ("Yemeni rial", "yemeni rial|yer|yemeni rials"), + ("Israeli new shekel", "₪|ils|agora"), + ("Lithuanian litas", "ltl|lithuanian litas|lithuan litas|lithuanian lit|lithuan lit"), + ("Japanese yen", "japanese yen|jpy|yen|-yen|¥|yens|japanese yens|japan yen|japan yens"), + ("Kazakhstani tenge", "kazakhstani tenge|kazakh tenge|kazak tenge|kzt"), + ("Kenyan shilling", "kenyan shilling|kes"), + ("North Korean won", "north korean won|kpw|north korean wons"), + ("South Korean won", "south korean won|krw|south korean wons"), + ("Korean won", "korean won|₩|korean wons"), + ("Kyrgyzstani som", "kyrgyzstani som|kgs"), + ("Uzbekitan som", "uzbekitan som|uzs"), + ("Lao kip", "lao kip|lak|₭n|₭"), + ("Att", "att"), + ("Lesotho loti", "lesotho loti|lsl|loti"), + ("Sente", "sente|lisente"), + ("South African rand", "south african rand|zar|south africa rand|south africa rands|south african rands"), + ("Macanese pataca", "macanese pataca|mop$|mop"), + ("Avo", "avos|avo"), + ("Macedonian denar", "macedonian denar|mkd|ден"), + ("Deni", "deni"), + ("Malagasy ariary", "malagasy ariary|mga"), + ("Iraimbilanja", "iraimbilanja"), + ("Malawian kwacha", "malawian kwacha|mk|mwk"), + ("Tambala", "tambala"), + ("Malaysian ringgit", "malaysian ringgit|rm|myr|malaysia ringgit|malaysia ringgits|malaysian ringgits"), + ("Mauritanian ouguiya", "mauritanian ouguiya|um|mro|mauritania ouguiya|mauritania ouguiyas|mauritanian ouguiyas"), + ("Khoums", "khoums"), + ("Mongolian tögrög", "mongolian tögrög|mnt|₮|mongolia tögrög|mongolia tögrögs|mongolian tögrögs|mongolian togrog|mongolian togrogs|mongolia togrog|mongolia togrogs"), + ("Mozambican metical", "mozambican metical|mt|mzn|mozambica metical|mozambica meticals|mozambican meticals"), + ("Burmese kyat", "burmese kyat|ks|mmk"), + ("Pya", "pya"), + ("Nicaraguan córdoba", "nicaraguan córdoba|nio"), + ("Nigerian naira", "nigerian naira|naira|ngn|₦|nigeria naira|nigeria nairas|nigerian nairas"), + ("Kobo", "kobo"), + ("Turkish lira", "turkish lira|try|tl|turkey lira|turkey liras|turkish liras"), + ("Kuruş", "kuruş"), + ("Omani rial", "omani rial|omr|ر.ع."), + ("Panamanian balboa", "panamanian balboa|b/.|pab"), + ("Centesimo", "centesimo"), + ("Papua New Guinean kina", "papua new guinean kina|kina|pgk"), + ("Toea", "toea"), + ("Paraguayan guaraní", "paraguayan guaraní|₲|pyg"), + ("Peruvian sol", "peruvian sol|soles|sol|peruvian nuevo sol"), + ("Polish złoty", "złoty|polish złoty|zł|pln|zloty|polish zloty|poland zloty|poland złoty"), + ("Grosz", "groszy|grosz|grosze"), + ("Qatari riyal", "qatari riyal|qar|qatari riyals|qatar riyal|qatar riyals"), + ("Saudi riyal", "saudi riyal|sar|saudi riyals"), + ("Riyal", "riyal|riyals|rial|﷼"), + ("Dirham", "dirham|dirhem|dirhm"), + ("Halala", "halalas|halala"), + ("Samoan tālā", "samoan tālā|tālā|tala|ws$|samoa|wst|samoan tala"), + ("Sene", "sene"), + ("São Tomé and Príncipe dobra", "são tomé and príncipe dobra|dobras|dobra"), + ("Sierra Leonean leone", "sierra leonean leone|sll|leone|le"), + ("Peseta", "pesetas|peseta"), + ("Netherlands guilder", "florin|netherlands antillean guilder|ang|nederlandse gulden|guilders|guilder|gulden|-guilders|-guilder|dutch guilders|dutch guilder|fl"), + ("Swazi lilangeni", "swazi lilangeni|lilangeni|szl|emalangeni"), + ("Tajikistani somoni", "tajikistani somoni|tjs|somoni"), + ("Diram", "dirams|diram"), + ("Thai baht", "thai baht|฿|thb|baht"), + ("Satang", "satang|satangs"), + ("Tongan paʻanga", "tongan paʻanga|paʻanga|tongan pa'anga|pa'anga"), + ("Seniti", "seniti"), + ("Ukrainian hryvnia", "ukrainian hryvnia|hyrvnia|uah|₴|ukrain hryvnia|ukrain hryvnias|ukrainian hryvnias"), + ("Vanuatu vatu", "vanuatu vatu|vatu|vuv"), + ("Venezuelan bolívar", "venezuelan bolívar|venezuelan bolívars|bs.f.|vef|bolívar fuerte|venezuelan bolivar|venezuelan bolivars|venezuela bolivar|venezuela bolivarsvenezuelan bolivar|venezuelan bolivars"), + ("Vietnamese dong", "vietnamese dong|vnd|đồng|vietnam dong|vietnamese dongs|vietnam dongs"), + ("Zambian kwacha", "zambian kwacha|zk|zmw|zambia kwacha|kwachas|zambian kwachas"), + ("Moroccan dirham", "moroccan dirham|mad|د.م."), + ("United Arab Emirates dirham", "united arab emirates dirham|د.إ|aed"), + ("Azerbaijani manat", "azerbaijani manat|azn"), + ("Turkmenistan manat", "turkmenistan manat|turkmenistan new manat|tmt"), + ("Manat", "manats|manat"), + ("Qəpik", "qəpik"), + ("Somali shilling", "somali shillings|somali shilling|shilin soomaali|-shilin soomaali|scellino|shilin|sh.so.|sos"), + ("Somaliland shilling", "somaliland shillings|somaliland shilling|soomaaliland shilin"), + ("Tanzanian shilling", "tanzanian shilling|tanzanian shillings|tsh|tzs|tanzania shilling|tanzania shillings"), + ("Ugandan shilling", "ugandan shilling|ugandan shillings|ugx|uganda shilling|uganda shillings"), + ("Romanian leu", "romanian leu|lei|ron|romania leu"), + ("Moldovan leu", "moldovan leu|mdl|moldova leu"), + ("Leu", "leu"), + ("Ban", "bani|-ban|ban"), + ("Nepalese rupee", "nepalese rupees|nepalese rupee|npr"), + ("Pakistani rupee", "pakistani rupees|pakistani rupee|pkr"), + ("Indian rupee", "indian rupees|indian rupee|inr|₹|india rupees|india rupee"), + ("Seychellois rupee", "seychellois rupees|seychellois rupee|scr|sr|sre"), + ("Mauritian rupee", "mauritian rupees|mauritian rupee|mur"), + ("Maldivian rufiyaa", "maldivian rufiyaas|maldivian rufiyaa|mvr|.ރ|maldive rufiyaas|maldive rufiyaa"), + ("Sri Lankan rupee", "sri lankan rupees|sri lankan rupee|lkr|රු|ரூ"), + ("Indonesian rupiah", "indonesian rupiah|rupiah|perak|rp|idr"), + ("Rupee", "rupee|rupees|rs"), + ("Danish krone", "danish krone|dkk|denmark krone|denmark krones|danish krones"), + ("Norwegian krone", "norwegian krone|nok|norway krone|norway krones|norwegian krones"), + ("Faroese króna", "faroese króna|faroese krona"), + ("Icelandic króna", "icelandic króna|isk|icelandic krona|iceland króna|iceland krona"), + ("Swedish krona", "swedish krona|sek|swedan krona"), + ("Krone", "kronor|krona|króna|krone|krones|kr|-kr"), + ("Øre", "Øre|oyra|eyrir"), + ("West African CFA franc", "west african cfa franc|xof|west africa cfa franc|west africa franc|west african franc"), + ("Central African CFA franc", "central african cfa franc|xaf|central africa cfa franc|central african franc|central africa franc"), + ("Comorian franc", "comorian franc|kmf"), + ("Congolese franc", "congolese franc|cdf"), + ("Burundian franc", "burundian franc|bif"), + ("Djiboutian franc", "djiboutian franc|djf"), + ("CFP franc", "cfp franc|xpf"), + ("Guinean franc", "guinean franc|gnf"), + ("Swiss franc", "swiss francs|swiss franc|chf|sfr."), + ("Rwandan franc", "Rwandan franc|rwf|rf|r₣|frw"), + ("Belgian franc", "belgian franc|bi.|b.fr.|bef|belgium franc"), + ("Rappen", "rappen|-rappen"), + ("Franc", "francs|franc|fr.|fs"), + ("Centime", "centimes|centime|santim"), + ("Russian ruble", "russian ruble|₽|rub|russia ruble|russia ₽|russian ₽|russian rubles|russia rubles"), + ("New Belarusian ruble", "new belarusian ruble|byn|new belarus ruble|new belarus rubles|new belarusian rubles"), + ("Old Belarusian ruble", "old belarusian ruble|byr|old belarus ruble|old belarus rubles|old belarusian rubles"), + ("Transnistrian ruble", "transnistrian ruble|prb|р."), + ("Belarusian ruble", "belarusian ruble|belarus ruble|belarus rubles|belarusian rubles"), + ("Kopek", "kopek|kopeks"), + ("Kapyeyka", "kapyeyka"), + ("Ruble", "rubles|ruble|br"), + ("Algerian dinar", "algerian dinar|د.ج|dzd|algerian dinars|algeria dinar|algeria dinars"), + ("Bahraini dinar", "bahraini dinars|bahraini dinar|bhd|.د.ب"), + ("Santeem", "santeem|santeems"), + ("Iraqi dinar", "iraqi dinars|iraqi dinar|iraq dinars|iraq dinar|iqd|ع.د"), + ("Jordanian dinar", "jordanian dinars|jordanian dinar|د.ا|jod|jordan dinar|jordan dinars"), + ("Kuwaiti dinar", "kuwaiti dinars|kuwaiti dinar|kwd|د.ك"), + ("Libyan dinar", "libyan dinars|libyan dinar|libya dinars|libya dinar|lyd"), + ("Serbian dinar", "serbian dinars|serbian dinar|din.|rsd|дин.|serbia dinars|serbia dinar"), + ("Tunisian dinar", "tunisian dinars|tunisian dinar|tnd|tunisia dinars|tunisia dinar"), + ("Yugoslav dinar", "yugoslav dinars|yugoslav dinar|yun"), + ("Dinar", "dinars|dinar|denar|-dinars|-dinar"), + ("Fils", "fils|fulūs|-fils|-fil"), + ("Para", "para|napa"), + ("Millime", "millimes|millime"), + ("Argentine peso", "argentine peso|ars|argetina peso|argetina pesos|argentine pesos"), + ("Chilean peso", "chilean pesos|chilean peso|clp|chile peso|chile peso"), + ("Colombian peso", "colombian pesos|colombian peso|cop|colombia peso|colombia pesos"), + ("Cuban convertible peso", "cuban convertible pesos|cuban convertible peso|cuc|cuba convertible pesos|cuba convertible peso"), + ("Cuban peso", "cuban pesos|cuban peso|cup|cuba pesos|cuba peso"), + ("Dominican peso", "dominican pesos|dominican peso|dop|dominica pesos|dominica peso"), + ("Mexican peso", "mexican pesos|mexican peso|mxn|mexico pesos|mexico peso|mxn$|mxn $|mex$"), + ("Philippine peso", "piso|philippine pesos|philippine peso|₱|php"), + ("Uruguayan peso", "uruguayan pesos|uruguayan peso|uyu"), + ("Peso", "pesos|peso"), + ("Centavo", "centavos|centavo"), + ("Alderney pound", "alderney pounds|alderney pound|alderney £"), + ("British pound", "british pounds|british pound|british £|gbp|pound sterling|pound sterlings|sterling|pound scot|pound scots"), + ("Guernsey pound", "guernsey pounds|guernsey £|ggp"), + ("Ascension pound", "ascension pounds|ascension pound|ascension £"), + ("Saint Helena pound", "saint helena pounds|saint helena pound|saint helena £|shp"), + ("Egyptian pound", "egyptian pounds|egyptian pound|egyptian £|egp|ج.م|egypt pounds|egypt pound"), + ("Falkland Islands pound", "falkland islands pounds|falkland islands pound|falkland islands £|fkp|falkland island pounds|falkland island pound|falkland island £"), + ("Gibraltar pound", "gibraltar pounds|gibraltar pound|gibraltar £|gip"), + ("Manx pound", "manx pounds|manx pound|manx £|imp"), + ("Jersey pound", "jersey pounds|jersey pound|jersey £|jep"), + ("Lebanese pound", "lebanese pounds|lebanese pound|lebanese £|lebanan pounds|lebanan pound|lebanan £|lbp|ل.ل"), + ("South Georgia and the South Sandwich Islands pound", "south georgia and the south sandwich islands pounds|south georgia and the south sandwich islands pound|south georgia and the south sandwich islands £"), + ("South Sudanese pound", "south sudanese pounds|south sudanese pound|south sudanese £|ssp|south sudan pounds|south sudan pound|south sudan £"), + ("Sudanese pound", "sudanese pounds|sudanese pound|sudanese £|ج.س.|sdg|sudan pounds|sudan pound|sudan £"), + ("Syrian pound", "syrian pounds|syrian pound|syrian £|ل.س|syp|syria pounds|syria pound|syria £"), + ("Tristan da Cunha pound", "tristan da cunha pounds|tristan da cunha pound|tristan da cunha £"), + ("Pound", "pounds|pound|-pounds|-pound|£"), + ("Pence", "pence"), + ("Shilling", "shillings|shilling|shilingi|sh"), + ("Penny", "pennies|penny"), + ("United States dollar", "united states dollars|united states dollar|united states $|u.s. dollars|u.s. dollar|u s dollar|u s dollars|usd|american dollars|american dollar|us$|us dollar|us dollars|u.s dollar|u.s dollars|usd$"), + ("East Caribbean dollar", "east caribbean dollars|east caribbean dollar|east Caribbean $|xcd"), + ("Australian dollar", "australian dollars|australian dollar|australian $|australian$|aud|australia dollars|australia dollar|australia $|australia$"), + ("Bahamian dollar", "bahamian dollars|bahamian dollar|bahamian $|bahamian$|bsd|bahamia dollars|bahamia dollar|bahamia $|bahamia$"), + ("Barbadian dollar", "barbadian dollars|barbadian dollar|barbadian $|bbd"), + ("Belize dollar", "belize dollars|belize dollar|belize $|bzd"), + ("Bermudian dollar", "bermudian dollars|bermudian dollar|bermudian $|bmd|bermudia dollars|bermudia dollar|bermudia $"), + ("British Virgin Islands dollar", "british virgin islands dollars|british virgin islands dollar|british virgin islands $|bvi$|virgin islands dollars|virgin islands dolalr|virgin islands $|virgin island dollars|virgin island dollar|virgin island $"), + ("Brunei dollar", "brunei dollar|brunei $|bnd"), + ("Sen", "sen"), + ("Singapore dollar", "singapore dollars|singapore dollar|singapore $|s$|sgd"), + ("Canadian dollar", "canadian dollars|canadian dollar|canadian $|cad|can$|c$|canada dollars|canada dolllar|canada $"), + ("Cayman Islands dollar", "cayman islands dollars|cayman islands dollar|cayman islands $|kyd|ci$|cayman island dollar|cayman island doolars|cayman island $"), + ("New Zealand dollar", "new zealand dollars|new zealand dollar|new zealand $|nz$|nzd|kiwi"), + ("Cook Islands dollar", "cook islands dollars|cook islands dollar|cook islands $|cook island dollars|cook island dollar|cook island $"), + ("Fijian dollar", "fijian dollars|fijian dollar|fijian $|fjd|fiji dollars|fiji dollar|fiji $"), + ("Guyanese dollar", "guyanese dollars|guyanese dollar|gyd|gy$"), + ("Hong Kong dollar", "hong kong dollars|hong kong dollar|hong kong $|hk$|hkd|hk dollars|hk dollar|hk $|hongkong$"), + ("Jamaican dollar", "jamaican dollars|jamaican dollar|jamaican $|j$|jamaica dollars|jamaica dollar|jamaica $|jmd"), + ("Kiribati dollar", "kiribati dollars|kiribati dollar|kiribati $"), + ("Liberian dollar", "liberian dollars|liberian dollar|liberian $|liberia dollars|liberia dollar|liberia $|lrd"), + ("Micronesian dollar", "micronesian dollars|micronesian dollar|micronesian $"), + ("Namibian dollar", "namibian dollars|namibian dollar|namibian $|nad|n$|namibia dollars|namibia dollar|namibia $"), + ("Nauruan dollar", "nauruan dollars|nauruan dollar|nauruan $"), + ("Niue dollar", "niue dollars|niue dollar|niue $"), + ("Palauan dollar", "palauan dollars|palauan dollar|palauan $"), + ("Pitcairn Islands dollar", "pitcairn islands dollars|pitcairn islands dollar|pitcairn islands $|pitcairn island dollars|pitcairn island dollar|pitcairn island $"), + ("Solomon Islands dollar", "solomon islands dollars|solomon islands dollar|solomon islands $|si$|sbd|solomon island dollars|solomon island dollar|solomon island $"), + ("Surinamese dollar", "surinamese dollars|surinamese dollar|surinamese $|srd"), + ("New Taiwan dollar", "new taiwan dollars|new taiwan dollar|nt$|twd|ntd"), + ("Trinidad and Tobago dollar", "trinidad and tobago dollars|trinidad and tobago dollar|trinidad and tobago $|trinidad $|trinidad dollar|trinidad dollars|trinidadian dollar|trinidadian dollars|trinidadian $|ttd"), + ("Tuvaluan dollar", "tuvaluan dollars|tuvaluan dollar|tuvaluan $"), + ("Dollar", "dollars|dollar|$"), + ("Chinese yuan", "yuan|kuai|chinese yuan|renminbi|cny|rmb|¥|元"), + ("Fen", "fen"), + ("Jiao", "jiao|mao"), + ("Finnish markka", "suomen markka|finnish markka|finsk mark|fim|markkaa|markka"), + ("Penni", "penniä|penni"), + ("Bitcoin", "bitcoin|bitcoins|btc|xbt|₿"), + ("Millibitcoin", "millibitcoin|millibitcoins|milibitcoin|milibitcoins"), + ("Satoshi", "satoshi|satoshis")]) + CurrencyNameToIsoCodeMap = dict([("Afghan afghani", "AFN"), + ("Euro", "EUR"), + ("Albanian lek", "ALL"), + ("Angolan kwanza", "AOA"), + ("Armenian dram", "AMD"), + ("Aruban florin", "AWG"), + ("Bangladeshi taka", "BDT"), + ("Bhutanese ngultrum", "BTN"), + ("Bolivian boliviano", "BOB"), + ("Bosnia and Herzegovina convertible mark", "BAM"), + ("Botswana pula", "BWP"), + ("Brazilian real", "BRL"), + ("Bulgarian lev", "BGN"), + ("Cambodian riel", "KHR"), + ("Cape Verdean escudo", "CVE"), + ("Costa Rican colón", "CRC"), + ("Croatian kuna", "HRK"), + ("Czech koruna", "CZK"), + ("Eritrean nakfa", "ERN"), + ("Ethiopian birr", "ETB"), + ("Gambian dalasi", "GMD"), + ("Georgian lari", "GEL"), + ("Ghanaian cedi", "GHS"), + ("Guatemalan quetzal", "GTQ"), + ("Haitian gourde", "HTG"), + ("Honduran lempira", "HNL"), + ("Hungarian forint", "HUF"), + ("Iranian rial", "IRR"), + ("Yemeni rial", "YER"), + ("Israeli new shekel", "ILS"), + ("Japanese yen", "JPY"), + ("Kazakhstani tenge", "KZT"), + ("Kenyan shilling", "KES"), + ("North Korean won", "KPW"), + ("South Korean won", "KRW"), + ("Kyrgyzstani som", "KGS"), + ("Lao kip", "LAK"), + ("Lesotho loti", "LSL"), + ("South African rand", "ZAR"), + ("Macanese pataca", "MOP"), + ("Macedonian denar", "MKD"), + ("Malagasy ariary", "MGA"), + ("Malawian kwacha", "MWK"), + ("Malaysian ringgit", "MYR"), + ("Mauritanian ouguiya", "MRO"), + ("Mongolian tögrög", "MNT"), + ("Mozambican metical", "MZN"), + ("Burmese kyat", "MMK"), + ("Nicaraguan córdoba", "NIO"), + ("Nigerian naira", "NGN"), + ("Turkish lira", "TRY"), + ("Omani rial", "OMR"), + ("Panamanian balboa", "PAB"), + ("Papua New Guinean kina", "PGK"), + ("Paraguayan guaraní", "PYG"), + ("Peruvian sol", "PEN"), + ("Polish złoty", "PLN"), + ("Qatari riyal", "QAR"), + ("Saudi riyal", "SAR"), + ("Samoan tālā", "WST"), + ("São Tomé and Príncipe dobra", "STN"), + ("Sierra Leonean leone", "SLL"), + ("Swazi lilangeni", "SZL"), + ("Tajikistani somoni", "TJS"), + ("Thai baht", "THB"), + ("Ukrainian hryvnia", "UAH"), + ("Vanuatu vatu", "VUV"), + ("Venezuelan bolívar", "VEF"), + ("Zambian kwacha", "ZMW"), + ("Moroccan dirham", "MAD"), + ("United Arab Emirates dirham", "AED"), + ("Azerbaijani manat", "AZN"), + ("Turkmenistan manat", "TMT"), + ("Somali shilling", "SOS"), + ("Tanzanian shilling", "TZS"), + ("Ugandan shilling", "UGX"), + ("Romanian leu", "RON"), + ("Moldovan leu", "MDL"), + ("Nepalese rupee", "NPR"), + ("Pakistani rupee", "PKR"), + ("Indian rupee", "INR"), + ("Seychellois rupee", "SCR"), + ("Mauritian rupee", "MUR"), + ("Maldivian rufiyaa", "MVR"), + ("Sri Lankan rupee", "LKR"), + ("Indonesian rupiah", "IDR"), + ("Danish krone", "DKK"), + ("Norwegian krone", "NOK"), + ("Icelandic króna", "ISK"), + ("Swedish krona", "SEK"), + ("West African CFA franc", "XOF"), + ("Central African CFA franc", "XAF"), + ("Comorian franc", "KMF"), + ("Congolese franc", "CDF"), + ("Burundian franc", "BIF"), + ("Djiboutian franc", "DJF"), + ("CFP franc", "XPF"), + ("Guinean franc", "GNF"), + ("Swiss franc", "CHF"), + ("Rwandan franc", "RWF"), + ("Russian ruble", "RUB"), + ("Transnistrian ruble", "PRB"), + ("New Belarusian ruble", "BYN"), + ("Algerian dinar", "DZD"), + ("Bahraini dinar", "BHD"), + ("Iraqi dinar", "IQD"), + ("Jordanian dinar", "JOD"), + ("Kuwaiti dinar", "KWD"), + ("Libyan dinar", "LYD"), + ("Serbian dinar", "RSD"), + ("Tunisian dinar", "TND"), + ("Argentine peso", "ARS"), + ("Chilean peso", "CLP"), + ("Colombian peso", "COP"), + ("Cuban convertible peso", "CUC"), + ("Cuban peso", "CUP"), + ("Dominican peso", "DOP"), + ("Mexican peso", "MXN"), + ("Uruguayan peso", "UYU"), + ("British pound", "GBP"), + ("Saint Helena pound", "SHP"), + ("Egyptian pound", "EGP"), + ("Falkland Islands pound", "FKP"), + ("Gibraltar pound", "GIP"), + ("Manx pound", "IMP"), + ("Jersey pound", "JEP"), + ("Lebanese pound", "LBP"), + ("South Sudanese pound", "SSP"), + ("Sudanese pound", "SDG"), + ("Syrian pound", "SYP"), + ("United States dollar", "USD"), + ("Australian dollar", "AUD"), + ("Bahamian dollar", "BSD"), + ("Barbadian dollar", "BBD"), + ("Belize dollar", "BZD"), + ("Bermudian dollar", "BMD"), + ("Brunei dollar", "BND"), + ("Singapore dollar", "SGD"), + ("Canadian dollar", "CAD"), + ("Cayman Islands dollar", "KYD"), + ("New Zealand dollar", "NZD"), + ("Fijian dollar", "FJD"), + ("Guyanese dollar", "GYD"), + ("Hong Kong dollar", "HKD"), + ("Jamaican dollar", "JMD"), + ("Liberian dollar", "LRD"), + ("Namibian dollar", "NAD"), + ("Solomon Islands dollar", "SBD"), + ("Surinamese dollar", "SRD"), + ("New Taiwan dollar", "TWD"), + ("Trinidad and Tobago dollar", "TTD"), + ("Tuvaluan dollar", "TVD"), + ("Chinese yuan", "CNY"), + ("Rial", "__RI"), + ("Shiling", "__S"), + ("Som", "__SO"), + ("Dirham", "__DR"), + ("Dinar", "_DN"), + ("Dollar", "__D"), + ("Manat", "__MA"), + ("Rupee", "__R"), + ("Krone", "__K"), + ("Krona", "__K"), + ("Crown", "__K"), + ("Frank", "__F"), + ("Mark", "__M"), + ("Ruble", "__RB"), + ("Peso", "__PE"), + ("Pound", "__P"), + ("Tristan da Cunha pound", "_TP"), + ("South Georgia and the South Sandwich Islands pound", "_SP"), + ("Somaliland shilling", "_SS"), + ("Pitcairn Islands dollar", "_PND"), + ("Palauan dollar", "_PD"), + ("Niue dollar", "_NID"), + ("Nauruan dollar", "_ND"), + ("Micronesian dollar", "_MD"), + ("Kiribati dollar", "_KID"), + ("Guernsey pound", "_GGP"), + ("Faroese króna", "_FOK"), + ("Cook Islands dollar", "_CKD"), + ("British Virgin Islands dollar", "_BD"), + ("Ascension pound", "_AP"), + ("Alderney pound", "_ALP"), + ("Abkhazian apsar", "_AA"), + ("Bitcoin", "_XBT")]) + FractionalUnitNameToCodeMap = dict([("Jiao", "JIAO"), + ("Kopek", "KOPEK"), + ("Pul", "PUL"), + ("Cent", "CENT"), + ("Qindarkë", "QINDARKE"), + ("Penny", "PENNY"), + ("Santeem", "SANTEEM"), + ("Cêntimo", "CENTIMO"), + ("Centavo", "CENTAVO"), + ("Luma", "LUMA"), + ("Qəpik", "QƏPIK"), + ("Fils", "FILS"), + ("Poisha", "POISHA"), + ("Kapyeyka", "KAPYEYKA"), + ("Centime", "CENTIME"), + ("Chetrum", "CHETRUM"), + ("Paisa", "PAISA"), + ("Fening", "FENING"), + ("Thebe", "THEBE"), + ("Sen", "SEN"), + ("Stotinka", "STOTINKA"), + ("Fen", "FEN"), + ("Céntimo", "CENTIMO"), + ("Lipa", "LIPA"), + ("Haléř", "HALER"), + ("Øre", "ØRE"), + ("Piastre", "PIASTRE"), + ("Santim", "SANTIM"), + ("Oyra", "OYRA"), + ("Butut", "BUTUT"), + ("Tetri", "TETRI"), + ("Pesewa", "PESEWA"), + ("Fillér", "FILLER"), + ("Eyrir", "EYRIR"), + ("Dinar", "DINAR"), + ("Agora", "AGORA"), + ("Tïın", "TIIN"), + ("Chon", "CHON"), + ("Jeon", "JEON"), + ("Tyiyn", "TYIYN"), + ("Att", "ATT"), + ("Sente", "SENTE"), + ("Dirham", "DIRHAM"), + ("Rappen", "RAPPEN"), + ("Avo", "AVO"), + ("Deni", "DENI"), + ("Iraimbilanja", "IRAIMBILANJA"), + ("Tambala", "TAMBALA"), + ("Laari", "LAARI"), + ("Khoums", "KHOUMS"), + ("Ban", "BAN"), + ("Möngö", "MONGO"), + ("Pya", "PYA"), + ("Kobo", "KOBO"), + ("Kuruş", "KURUS"), + ("Baisa", "BAISA"), + ("Centésimo", "CENTESIMO"), + ("Toea", "TOEA"), + ("Sentimo", "SENTIMO"), + ("Grosz", "GROSZ"), + ("Sene", "SENE"), + ("Halala", "HALALA"), + ("Para", "PARA"), + ("Öre", "ORE"), + ("Diram", "DIRAM"), + ("Satang", "SATANG"), + ("Seniti", "SENITI"), + ("Millime", "MILLIME"), + ("Tennesi", "TENNESI"), + ("Kopiyka", "KOPIYKA"), + ("Tiyin", "TIYIN"), + ("Hào", "HAO"), + ("Ngwee", "NGWEE"), + ("Millibitcoin", "MILLIBITCOIN"), + ("Satoshi", "SATOSHI")]) + CompoundUnitConnectorRegex = f'(?and)' + MultiplierRegex = f'\\s*\\b(thousand|million|billion|trillion)s?\\b' + CurrencyPrefixList = dict([("Dobra", "db|std"), + ("Dollar", "$"), + ("Brazilian Real", "R$"), + ("United States dollar", "united states $|us$|us $|u.s. $|u.s $|usd$|usd"), + ("East Caribbean dollar", "east caribbean $|xcd"), + ("Mexican peso", "mxn$|mxn $|mex$|mxn"), + ("Australian dollar", "australian $|australia $|aud|aud$"), + ("Bahamian dollar", "bahamian $|bahamia $|bsd"), + ("Barbadian dollar", "barbadian $|barbadin $|bbd"), + ("Belize dollar", "belize $|bzd"), + ("Bermudian dollar", "bermudian $|bmd"), + ("British Virgin Islands dollar", "british virgin islands $|bvi$|virgin islands $|virgin island $|british virgin island $"), + ("Brunei dollar", "brunei $|b$|bnd"), + ("Sen", "sen"), + ("Singapore dollar", "singapore $|s$|sgd"), + ("Canadian dollar", "canadian $|can$|c$|c $|canada $|cad|cad$"), + ("Cayman Islands dollar", "cayman islands $|ci$|cayman island $|kyd"), + ("New Zealand dollar", "new zealand $|nz$|nz $|nzd|nzd$"), + ("Cook Islands dollar", "cook islands $|cook island $"), + ("Fijian dollar", "fijian $|fiji $|fjd"), + ("Guyanese dollar", "gy$|gy $|g$|g $|gyd"), + ("Hong Kong dollar", "hong kong $|hk$|hkd|hk $|hkd"), + ("Indian rupee", "₹|inr"), + ("Jamaican dollar", "jamaican $|j$|jamaica $|jmd"), + ("Kiribati dollar", "kiribati $"), + ("Liberian dollar", "liberian $|liberia $|lrd"), + ("Micronesian dollar", "micronesian $"), + ("Namibian dollar", "namibian $|nad|n$|namibia $|nad"), + ("Nauruan dollar", "nauruan $"), + ("Niue dollar", "niue $"), + ("Palauan dollar", "palauan $"), + ("Pitcairn Islands dollar", "pitcairn islands $|pitcairn island $"), + ("Solomon Islands dollar", "solomon islands $|si$|si $|solomon island $|sbd"), + ("Surinamese dollar", "surinamese $|surinam $|srd"), + ("New Taiwan dollar", "nt$|nt $|ntd|twd"), + ("Trinidad and Tobago dollar", "trinidad and tobago $|trinidad $|trinidadian $|ttd"), + ("Tuvaluan dollar", "tuvaluan $"), + ("Samoan tālā", "ws$|wst"), + ("Chinese yuan", "¥|cny|rmb"), + ("Japanese yen", "¥|jpy"), + ("Euro", "€|eur"), + ("Pound", "£|gbp"), + ("Costa Rican colón", "₡|crc"), + ("Turkish lira", "₺|try"), + ("Bitcoin", "₿|btc|xbt")]) + AmbiguousCurrencyUnitList = [r'din.', r'kiwi', r'kina', r'kobo', r'lari', r'lipa', r'napa', r'para', r'sfr.', r'taka', r'tala', r'toea', r'vatu', r'yuan', r'all', r'ang', r'ban', r'bob', r'btn', r'byr', r'cad', r'cop', r'cup', r'dop', r'gip', r'jod', r'kgs', r'lak', r'lei', r'mga', r'mop', r'nad', r'omr', r'pul', r'sar', r'sbd', r'scr', r'sdg', r'sek', r'sen', r'sol', r'sos', r'std', r'try', r'yer', r'yen', r'db', r'pen', r'ron', r'mad', r'zar', r'gel', r'satoshi', r'satoshis'] + BuildPrefix = f'(?<=(\\s|^))' + BuildSuffix = f'(?=(\\s|\\W|$))' + ConnectorToken = "and" + +# pylint: enable=line-too-long diff --git a/Python/libraries/recognizers-number/recognizers_number/culture.py b/Python/libraries/recognizers-number/recognizers_number/culture.py index db303bda39..06efca5866 100644 --- a/Python/libraries/recognizers-number/recognizers_number/culture.py +++ b/Python/libraries/recognizers-number/recognizers_number/culture.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - from recognizers_text.culture import BaseCultureInfo, Culture from .number import LongFormatMode, LongFormatType @@ -16,6 +13,7 @@ Culture.Dutch: LongFormatMode.DOUBLE_DOT_COMMA, Culture.Japanese: LongFormatMode.DOUBLE_COMMA_DOT, Culture.Italian: LongFormatMode.DOUBLE_DOT_COMMA, + Culture.Minimal: LongFormatMode.DOUBLE_COMMA_DOT, } diff --git a/Python/libraries/recognizers-number/recognizers_number/number/__init__.py b/Python/libraries/recognizers-number/recognizers_number/number/__init__.py index 9315f4bb97..f407f58ab6 100644 --- a/Python/libraries/recognizers-number/recognizers_number/number/__init__.py +++ b/Python/libraries/recognizers-number/recognizers_number/number/__init__.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - from .models import * from .extractors import * from .parsers import * @@ -18,3 +15,4 @@ from .number_recognizer import * from .parser_factory import * from .utilities import * +from .minimal import * diff --git a/Python/libraries/recognizers-number/recognizers_number/number/extractors.py b/Python/libraries/recognizers-number/recognizers_number/number/extractors.py index caea762ec3..fcd2705c7f 100644 --- a/Python/libraries/recognizers-number/recognizers_number/number/extractors.py +++ b/Python/libraries/recognizers-number/recognizers_number/number/extractors.py @@ -1,5 +1,3 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. import copy from abc import abstractmethod from typing import List, Pattern, Dict, Match @@ -9,7 +7,6 @@ from recognizers_text.utilities import RegExpUtility from recognizers_text.extractor import Extractor, ExtractResult from recognizers_number.resources.base_numbers import BaseNumbers -from recognizers_number.resources.english_numeric import EnglishNumeric from recognizers_number.number.models import LongFormatType from recognizers_number.number.constants import Constants diff --git a/Python/libraries/recognizers-number/recognizers_number/number/minimal/__init__.py b/Python/libraries/recognizers-number/recognizers_number/number/minimal/__init__.py new file mode 100644 index 0000000000..d79a5447e6 --- /dev/null +++ b/Python/libraries/recognizers-number/recognizers_number/number/minimal/__init__.py @@ -0,0 +1,2 @@ +from .extractors import * +from .parsers import * diff --git a/Python/libraries/recognizers-number/recognizers_number/number/minimal/extractors.py b/Python/libraries/recognizers-number/recognizers_number/number/minimal/extractors.py new file mode 100644 index 0000000000..31da9926a9 --- /dev/null +++ b/Python/libraries/recognizers-number/recognizers_number/number/minimal/extractors.py @@ -0,0 +1,206 @@ +from typing import Pattern, List, NamedTuple +from collections import namedtuple +import regex + +from recognizers_text.extractor import ExtractResult +from recognizers_number.number.models import NumberMode, LongFormatMode +from recognizers_number.resources.minimal_numeric import MinimalNumeric +from recognizers_number.number.extractors import ReVal, ReRe, BaseNumberExtractor +from recognizers_number.number.constants import Constants + +ReVal = namedtuple('ReVal', ['re', 'val']) +ReRe = namedtuple('ReRe', ['reKey', 'reVal']) +MatchesVal = namedtuple('MatchesVal', ['matches', 'val']) + + +class MinimalNumberExtractor(BaseNumberExtractor): + @property + def regexes(self) -> List[ReVal]: + return self.__regexes + + @property + def ambiguity_filters_dict(self) -> List[ReRe]: + return self.__ambiguity_filters_dict + + @property + def _extract_type(self) -> str: + return Constants.SYS_NUM + + def __init__(self, mode: NumberMode = NumberMode.DEFAULT): + self.__regexes: List[ReVal] = list() + cardinal_ex: MinimalCardinalExtractor = None + + if mode is NumberMode.PURE_NUMBER: + cardinal_ex = MinimalCardinalExtractor( + MinimalNumeric.PlaceHolderPureNumber) + elif mode is NumberMode.CURRENCY: + self.__regexes.append( + ReVal(re=MinimalNumeric.CurrencyRegex, val='IntegerNum')) + + if cardinal_ex is None: + cardinal_ex = MinimalCardinalExtractor() + + self.__regexes.extend(cardinal_ex.regexes) + + ambiguity_filters_dict: List[ReRe] = list() + + self.__ambiguity_filters_dict = ambiguity_filters_dict + + def extract(self, source: str) -> List[ExtractResult]: + if source is None or len(source.strip()) == 0: + return list() + result: List[ExtractResult] = list() + match_source = dict() + matched: List[bool] = [False] * len(source) + + matches_list = list(map( + lambda x: MatchesVal(matches=list(regex.finditer(x.re, source)), + val=x.val), self.regexes)) + matches_list = list(filter(lambda x: len(x.matches) > 0, matches_list)) + for ml in matches_list: + for m in ml.matches: + for j in range(len(m.group())): + matched[m.start() + j] = True + # Keep Source Data for extra information + match_source[m] = ml.val + + last = -1 + for i in range(len(source)): + if not matched[i]: + last = i + else: + if i + 1 == len(source) or not matched[i + 1]: + start = last + 1 + length = i - last + substr = source[start:start + length].strip() + src_match = next((x for x in iter(match_source) if ( + x.start() == start and ( + x.end() - x.start()) == length)), None) + + # extract negative numbers + if self._negative_number_terms is not None: + match = regex.search(self._negative_number_terms, + source[0:start]) + if match is not None: + start = match.start() + length = length + match.end() - match.start() + substr = source[start:start + length].strip() + + if src_match is not None: + value = ExtractResult() + value.start = start + value.length = length + value.text = substr + value.type = self._extract_type + value.data = match_source.get(src_match, None) + result.append(value) + + result = self._filter_ambiguity(result, source) + return result + + +class MinimalCardinalExtractor(BaseNumberExtractor): + @property + def regexes(self) -> List[ReVal]: + return self.__regexes + + @property + def _extract_type(self) -> str: + return Constants.SYS_NUM_CARDINAL + + def __init__(self, placeholder: str = MinimalNumeric.PlaceHolderDefault): + self.__regexes: List[ReVal] = list() + + # Add integer regexes + integer_ex = MinimalIntegerExtractor(placeholder) + self.__regexes.extend(integer_ex.regexes) + + # Add double regexes + double_ex = MinimalDoubleExtractor(placeholder) + self.__regexes.extend(double_ex.regexes) + + +class MinimalIntegerExtractor(BaseNumberExtractor): + @property + def regexes(self) -> List[ + NamedTuple('re_val', [('re', Pattern), ('val', str)])]: + return self.__regexes + + @property + def _extract_type(self) -> str: + return Constants.SYS_NUM_INTEGER + + def __init__(self, placeholder: str = MinimalNumeric.PlaceHolderDefault): + self.__regexes = [ + ReVal( + re=MinimalNumeric.NumbersWithPlaceHolder(placeholder), + val='IntegerNum'), + ReVal( + re=MinimalNumeric.NumbersWithSuffix, + val='IntegerNum'), + ReVal( + re=self._generate_format_regex(LongFormatMode.INTEGER_DOT, + placeholder), + val='IntegerNum'), + ReVal( + re=self._generate_format_regex(LongFormatMode.INTEGER_BLANK, + placeholder), + val='IntegerNum'), + ReVal( + re=self._generate_format_regex( + LongFormatMode.INTEGER_NO_BREAK_SPACE, placeholder), + val='IntegerNum') + ] + + +class MinimalDoubleExtractor(BaseNumberExtractor): + @property + def regexes(self) -> List[ + NamedTuple('re_val', [('re', Pattern), ('val', str)])]: + return self.__regexes + + @property + def _extract_type(self) -> str: + return Constants.SYS_NUM_DOUBLE + + def __init__(self, placeholder: str = MinimalNumeric.PlaceHolderDefault): + self.__regexes = [ + ReVal( + re=MinimalNumeric.DoubleDecimalPointRegex(placeholder), + val='DoubleNum'), + ReVal( + re=MinimalNumeric.DoubleWithoutIntegralRegex(placeholder), + val='DoubleNum'), + ReVal( + re=MinimalNumeric.DoubleWithMultiplierRegex, + val='DoubleNum'), + ReVal( + re=MinimalNumeric.DoubleExponentialNotationRegex, + val='DoublePow'), + ReVal( + re=MinimalNumeric.DoubleCaretExponentialNotationRegex, + val='DoublePow'), + ReVal( + re=self._generate_format_regex(LongFormatMode.DOUBLE_DOT_COMMA, + placeholder), + val='DoubleNum'), + ReVal( + re=self._generate_format_regex( + LongFormatMode.DOUBLE_NO_BREAK_SPACE_COMMA, + placeholder), + val='DoubleNum') + ] + + +class MinimalOrdinalExtractor(BaseNumberExtractor): + @property + def regexes(self) -> List[ + NamedTuple('re_val', [('re', Pattern), ('val', str)])]: + return self.__regexes + + @property + def _extract_type(self) -> str: + return Constants.SYS_NUM_ORDINAL + + def __init__(self): + self.__regexes = [] diff --git a/Python/libraries/recognizers-number/recognizers_number/number/minimal/parsers.py b/Python/libraries/recognizers-number/recognizers_number/number/minimal/parsers.py new file mode 100644 index 0000000000..8d5f4442f7 --- /dev/null +++ b/Python/libraries/recognizers-number/recognizers_number/number/minimal/parsers.py @@ -0,0 +1,219 @@ +from typing import Dict, Pattern, Optional, List +import regex + +from recognizers_text.culture import Culture +from recognizers_text.utilities import RegExpUtility +from recognizers_text.extractor import ExtractResult +from recognizers_text.parser import ParseResult +from recognizers_text.meta_data import MetaData +from recognizers_number.culture import CultureInfo +from recognizers_number.number.parsers import BaseNumberParserConfiguration +from recognizers_number.resources.minimal_numeric import MinimalNumeric +from recognizers_number.number.parsers import BaseNumberParser, NumberParserConfiguration + + +class MinimalNumberParserConfiguration(BaseNumberParserConfiguration): + + @property + def cardinal_number_map(self) -> Dict[str, int]: + return None + + @property + def ordinal_number_map(self) -> Dict[str, int]: + return None + + @property + def round_number_map(self) -> Dict[str, int]: + return None + + @property + def digital_number_regex(self) -> Pattern: + return None + + @property + def fraction_marker_token(self) -> str: + return None + + @property + def negative_number_sign_regex(self) -> Pattern: + return self._negative_number_sign_regex + + @property + def half_a_dozen_regex(self) -> Pattern: + return None + + @property + def half_a_dozen_text(self) -> str: + return None + + @property + def word_separator_token(self) -> str: + return None + + @property + def written_decimal_separator_texts(self) -> List[str]: + return None + + @property + def written_group_separator_texts(self) -> List[str]: + return None + + @property + def written_integer_separator_texts(self) -> List[str]: + return None + + @property + def written_fraction_separator_texts(self) -> List[str]: + return None + + @property + def non_standard_separator_variants(self) -> List[str]: + return None + + @property + def is_multi_decimal_separator_culture(self) -> bool: + return None + + @property + def round_multiplier_regex(self) -> Pattern: + return None + + @property + def culture_info(self): + return self._culture_info + + @property + def lang_marker(self) -> str: + return self._lang_marker + + @property + def non_decimal_separator_char(self) -> str: + return self._non_decimal_separator_char + + @property + def decimal_separator_char(self) -> str: + return self._decimal_separator_char + + def __init__(self, culture_info=None, decimal_point_separator: bool = True): + if culture_info is None: + culture_info = CultureInfo(Culture.Minimal) + + self._culture_info = culture_info + self._lang_marker = MinimalNumeric.LangMarker + + # Allows user to choose whether to use a ',' or '.' as a decimal separator + # decimal_point_separator=True uses a decimal point as decimal separator and comma as thousands separator + # decimal_point_separator=False uses a comma as decimal separator and decimal point as thousands separator + if decimal_point_separator: + self._decimal_separator_char = MinimalNumeric.DecimalSeparatorChar + self._non_decimal_separator_char = MinimalNumeric.NonDecimalSeparatorChar + else: + self._decimal_separator_char = MinimalNumeric.NonDecimalSeparatorChar + self._non_decimal_separator_char = MinimalNumeric.DecimalSeparatorChar + + self._negative_number_sign_regex = RegExpUtility.get_safe_reg_exp( + MinimalNumeric.NegativeNumberSignRegex) + + +class MinimalNumberParser(BaseNumberParser): + def __init__(self, config: BaseNumberParserConfiguration): + self.config: NumberParserConfiguration = config + self.supported_types: List[str] = list() + + self.arabic_number_regex: Pattern = RegExpUtility.get_safe_reg_exp( + r'\d+', flags=regex.I | regex.S) + self.round_number_set: List[str] = [] + self.is_non_standard_separator_variant = False + + def parse(self, source: ExtractResult) -> Optional[ParseResult]: + # Check if the parser is configured to support specific types + if self.supported_types and source.type not in self.supported_types: + return None + ret: Optional[ParseResult] = None + extra = source.data if isinstance(source.data, str) else None + if not extra: + if self.arabic_number_regex.search(source.text): + extra = 'Num' + else: + extra = self.config.lang_marker + + if isinstance(source.data, List): + ers = source.data + inner_prs = [self.parse(rs) for rs in ers] + merged_prs = [] + + val = 0 + count = 0 + + for idx in range(len(inner_prs)): + val += inner_prs[idx].value + if (idx + 1 >= len(inner_prs)) or not self.__is_mergeable(float(str(inner_prs[idx].value)), + float(str(inner_prs[idx + 1].value))): + start = ers[idx - count].start + length = ers[idx].start + ers[idx].length - start + + parsed_result = ParseResult() + parsed_result.start = start + parsed_result.length = length + parsed_result.value = val + parsed_result.text = source.text[start - source.start:length] + parsed_result.type = source.type + parsed_result.data = None + + merged_prs.append(parsed_result) + if val != 0: + final_val = val + val = 0 + count = 0 + + else: + count += 1 + + ret = ParseResult() + ret.start = source.start + ret.length = source.length + ret.text = source.text + ret.type = source.type + ret.value = val + final_val + ret.data = merged_prs + elif 'Num' in extra: + ret = self._digit_number_parse(source) + elif 'Pow' in extra: + ret = self._power_number_parse(source) + + if isinstance(ret.data, List): + for parsed_result in ret.data: + ret.resolution_str = self._get_resolution_string(parsed_result.value) + elif ret and ret.value is not None: + + # Use culture_info to format values + ret.resolution_str = self.config.culture_info.format( + ret.value) if self.config.culture_info is not None else repr(ret.value) + + ret.resolution_str = self._get_resolution_string(ret.value) + ret.text = ret.text.lower() + + return ret + + def _digit_number_parse(self, ext_result: ExtractResult) -> ParseResult: + result = ParseResult() + result.start = ext_result.start + result.length = ext_result.length + result.text = ext_result.text + result.type = ext_result.type + result.meta_data = MetaData() if not result.meta_data else result.meta_data + + # [1] 24 + # [2] 12 32/33 + # [3] 1,000,000 + # [4] 234.567 + # [5] 44/55 + + power = 1 + handle = ext_result.text.lower() + + # Scale used in the calculate of double + result.value = self._get_digital_value(handle, power) + + return result + diff --git a/Python/libraries/recognizers-number/recognizers_number/number/number_recognizer.py b/Python/libraries/recognizers-number/recognizers_number/number/number_recognizer.py index 204a5ac92a..cbd8ab03f5 100644 --- a/Python/libraries/recognizers-number/recognizers_number/number/number_recognizer.py +++ b/Python/libraries/recognizers-number/recognizers_number/number/number_recognizer.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - from enum import IntFlag from typing import List @@ -32,6 +29,8 @@ from recognizers_number.number.italian.parsers import ItalianNumberParserConfiguration from recognizers_number.number.catalan.extractors import CatalanNumberExtractor, CatalanOrdinalExtractor from recognizers_number.number.catalan.parsers import CatalanNumberParserConfiguration +from recognizers_number.number.minimal.extractors import MinimalNumberExtractor +from recognizers_number.number.minimal.parsers import MinimalNumberParserConfiguration class NumberOptions(IntFlag): @@ -39,7 +38,9 @@ class NumberOptions(IntFlag): class NumberRecognizer(Recognizer[NumberOptions]): - def __init__(self, target_culture: str = None, options: NumberOptions = NumberOptions.NONE, lazy_initialization: bool = True): + def __init__(self, target_culture: str = None, options: NumberOptions = NumberOptions.NONE, + lazy_initialization: bool = True, decimal_point_separator: bool = True): + self.decimal_point_separator = decimal_point_separator if options < NumberOptions.NONE or options > NumberOptions.NONE: raise ValueError() super().__init__(target_culture, options, lazy_initialization) @@ -256,6 +257,14 @@ def initialize_configuration(self): )) # endregion + # region Minimal + self.register_model('NumberModel', Culture.Minimal, lambda options: NumberModel( + AgnosticNumberParserFactory.get_parser( + ParserType.NUMBER, MinimalNumberParserConfiguration(decimal_point_separator= + self.decimal_point_separator)), + MinimalNumberExtractor(NumberMode.PURE_NUMBER) + )) + # endregion def get_number_model(self, culture: str = None, fallback_to_default_culture: bool = True) -> Model: return self.get_model('NumberModel', culture, fallback_to_default_culture) @@ -267,8 +276,10 @@ def get_percentage_model(self, culture: str = None, fallback_to_default_culture: return self.get_model('PercentModel', culture, fallback_to_default_culture) -def recognize_number(query: str, culture: str, options: NumberOptions = NumberOptions.NONE, fallback_to_default_culture: bool = True) -> List[ModelResult]: - recognizer = NumberRecognizer(culture, options) +def recognize_number(query: str, culture: str, options: NumberOptions = NumberOptions.NONE, + fallback_to_default_culture: bool = True, + decimal_point_separator: bool = True) -> List[ModelResult]: + recognizer = NumberRecognizer(culture, options, decimal_point_separator=decimal_point_separator) model = recognizer.get_number_model(culture, fallback_to_default_culture) return model.parse(query) diff --git a/Python/libraries/recognizers-number/recognizers_number/number/parser_factory.py b/Python/libraries/recognizers-number/recognizers_number/number/parser_factory.py index ca0e077557..f029441111 100644 --- a/Python/libraries/recognizers-number/recognizers_number/number/parser_factory.py +++ b/Python/libraries/recognizers-number/recognizers_number/number/parser_factory.py @@ -1,6 +1,3 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - from enum import Enum from recognizers_number.number.parsers import NumberParserConfiguration, BaseNumberParser, BasePercentageParser @@ -8,6 +5,7 @@ from recognizers_number.number.cjk_parsers import CJKNumberParser from recognizers_number.number.chinese.parsers import ChineseNumberParserConfiguration from recognizers_number.number.japanese.parsers import JapaneseNumberParserConfiguration +from recognizers_number.number.minimal.parsers import MinimalNumberParserConfiguration, MinimalNumberParser class ParserType(Enum): @@ -23,16 +21,20 @@ class ParserType(Enum): class AgnosticNumberParserFactory: @staticmethod def get_parser(parser_type: ParserType, language_config: NumberParserConfiguration) -> BaseNumberParser: - parser = BaseNumberParser(language_config) chinese = isinstance(language_config, ChineseNumberParserConfiguration) japanese = isinstance( language_config, JapaneseNumberParserConfiguration) + minimal = isinstance(language_config, MinimalNumberParserConfiguration) if chinese: parser = CJKNumberParser(language_config) elif japanese: parser = CJKNumberParser(language_config) + elif minimal: + parser = MinimalNumberParser(language_config) + else: + parser = BaseNumberParser(language_config) if parser_type is ParserType.CARDINAL: parser.supported_types = [ diff --git a/Python/libraries/recognizers-number/recognizers_number/resources/__init__.py b/Python/libraries/recognizers-number/recognizers_number/resources/__init__.py index 57de0ab4d8..017d158fed 100644 --- a/Python/libraries/recognizers-number/recognizers_number/resources/__init__.py +++ b/Python/libraries/recognizers-number/recognizers_number/resources/__init__.py @@ -11,4 +11,5 @@ from .portuguese_numeric import PortugueseNumeric from .spanish_numeric import SpanishNumeric from .japanese_numeric import JapaneseNumeric -from .catalan_numeric import CatalanNumeric \ No newline at end of file +from .catalan_numeric import CatalanNumeric +from .minimal_numeric import MinimalNumeric \ No newline at end of file diff --git a/Python/libraries/recognizers-number/recognizers_number/resources/minimal_numeric.py b/Python/libraries/recognizers-number/recognizers_number/resources/minimal_numeric.py new file mode 100644 index 0000000000..769465d053 --- /dev/null +++ b/Python/libraries/recognizers-number/recognizers_number/resources/minimal_numeric.py @@ -0,0 +1,42 @@ +# pylint: disable=line-too-long + + +class MinimalNumeric: + LangMarker = 'min' + NumberReplaceToken = '@builtin.num' + FractionNumberReplaceToken = '@builtin.num.fraction' + + def IntegerRegexDefinition(placeholder, thousandsmark): + return f'(((? Date: Wed, 21 Feb 2024 17:35:24 +0000 Subject: [PATCH 2/8] Add some tests --- .../date_time/date_time_recognizer.py | 5 +- .../date_time/minimal/base_configs.py | 32 +-- .../date_time/minimal/base_minimal_date.py | 7 +- .../date_time/minimal/base_minimal_merged.py | 29 +- .../date_time/minimal/base_minimal_time.py | 7 +- .../date_time/minimal/common_configs.py | 30 +- .../minimal/date_extractor_config.py | 113 +------- .../date_time/minimal/date_parser_config.py | 133 +-------- .../minimal/merged_extractor_config.py | 27 +- .../date_time/minimal/merged_parser_config.py | 34 +-- .../minimal/time_extractor_config.py | 27 +- .../date_time/minimal/time_parser_config.py | 10 +- Python/tests/runner.py | 31 ++- Specs/DateTime/Minimal/DateTimeModel.json | 263 ++++++++++++++++++ Specs/Number/Minimal/NumberModel.json | 122 ++++++++ .../NumberWithUnit/Minimal/CurrencyModel.json | 77 +++++ 16 files changed, 534 insertions(+), 413 deletions(-) create mode 100644 Specs/DateTime/Minimal/DateTimeModel.json create mode 100644 Specs/Number/Minimal/NumberModel.json create mode 100644 Specs/NumberWithUnit/Minimal/CurrencyModel.json diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py index 96ac42c205..1b1bce1a3b 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py @@ -40,7 +40,6 @@ from .catalan.common_configs import CatalanCommonDateTimeParserConfiguration from .catalan.merged_extractor_config import CatalanMergedExtractorConfiguration from .catalan.merged_parser_config import CatalanMergedParserConfiguration -from .minimal.common_configs import MinimalCommonDateTimeParserConfiguration from .minimal.merged_extractor_config import BaseMinimalMergedExtractorConfiguration from .minimal.merged_parser_config import BaseMinimalMergedParserConfiguration @@ -134,9 +133,7 @@ def initialize_configuration(self): )) self.register_model('DateTimeModel', Culture.Minimal, lambda options: DateTimeModel( - BaseMinimalMergedParser(BaseMinimalMergedParserConfiguration( - MinimalCommonDateTimeParserConfiguration(dmyDateFormat=self.dmyDateFormat), - dmyDateFormat=self.dmyDateFormat), options), + BaseMinimalMergedParser(BaseMinimalMergedParserConfiguration(dmyDateFormat=self.dmyDateFormat), options), BaseMinimalMergedExtractor(BaseMinimalMergedExtractorConfiguration(), options) )) diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_configs.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_configs.py index 3f4171593c..aaf01c69cc 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_configs.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_configs.py @@ -1,39 +1,9 @@ from typing import Pattern from recognizers_text.utilities import RegExpUtility -from recognizers_date_time.date_time.base_date import DateTimeUtilityConfiguration from recognizers_date_time.resources.minimal_date_time import MinimalDateTime -class MinimalDateTimeUtilityConfiguration(DateTimeUtilityConfiguration): - - @property - def date_unit_regex(self) -> Pattern: - return None - - @property - def ago_regex(self) -> Pattern: - return None - - @property - def later_regex(self) -> Pattern: - return None - - @property - def in_connector_regex(self) -> Pattern: - return None - - @property - def range_unit_regex(self) -> Pattern: - return None - - @property - def check_both_before_after(self) -> Pattern: - return None - - @property - def range_prefix_regex(self) -> Pattern: - return None - +class MinimalDateTimeUtilityConfiguration: @property def am_desc_regex(self) -> Pattern: return self._am_desc_regex diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_date.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_date.py index 42b11ac830..0c43a7f3ea 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_date.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_date.py @@ -4,12 +4,11 @@ from recognizers_text.extractor import ExtractResult from ..constants import TimeTypeConstants from ..parsers import DateTimeParseResult -from recognizers_date_time.date_time.base_date import DateExtractorConfiguration, BaseDateExtractor, \ - DateParserConfiguration, BaseDateParser +from recognizers_date_time.date_time.base_date import BaseDateExtractor, BaseDateParser class BaseMinimalDateExtractor(BaseDateExtractor): - def __init__(self, config: DateExtractorConfiguration): + def __init__(self, config): super().__init__(config) def extract(self, source: str, reference: datetime = None) -> List[ExtractResult]: @@ -24,7 +23,7 @@ def extract(self, source: str, reference: datetime = None) -> List[ExtractResult class BaseMinimalDateParser(BaseDateParser): - def __init__(self, config: DateParserConfiguration): + def __init__(self, config): super().__init__(config) def parse(self, source: ExtractResult, reference: datetime = None) -> Optional[DateTimeParseResult]: diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_merged.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_merged.py index 838978f897..4354657089 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_merged.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_merged.py @@ -1,12 +1,11 @@ -from typing import List +from typing import List, Optional from datetime import datetime from recognizers_text.extractor import ExtractResult from recognizers_date_time.date_time.constants import Constants from recognizers_date_time.date_time.utilities import DateTimeOptions -from recognizers_date_time.date_time.base_minimal_merged import MinimalMergedExtractorConfiguration, \ - MinimalMergedParserConfiguration from recognizers_date_time.date_time.base_merged import BaseMergedExtractor, BaseMergedParser +from recognizers_date_time.date_time.parsers import DateTimeParseResult class BaseMinimalMergedExtractor(BaseMergedExtractor): @@ -14,7 +13,7 @@ class BaseMinimalMergedExtractor(BaseMergedExtractor): def extractor_type_name(self) -> str: return Constants.SYS_DATETIME_MERGED - def __init__(self, config: MinimalMergedExtractorConfiguration, options: DateTimeOptions): + def __init__(self, config, options: DateTimeOptions): super().__init__(config, options) def extract(self, source: str, reference: datetime = None) -> List[ExtractResult]: @@ -29,10 +28,6 @@ def extract(self, source: str, reference: datetime = None) -> List[ExtractResult result = self.add_to( result, self.config.time_extractor.extract(source, reference), source) - # this should be at the end since if need the extractor to determine the previous text contains time or not - result = self.add_to( - result, self.number_ending_regex_match(source, result), source) - result = sorted(result, key=lambda x: x.start) return result @@ -43,7 +38,7 @@ class BaseMinimalMergedParser(BaseMergedParser): def parser_type_name(self) -> str: return Constants.SYS_DATETIME_MERGED - def __init__(self, config: MinimalMergedParserConfiguration, options: DateTimeOptions): + def __init__(self, config, options: DateTimeOptions): super().__init__(config, options) def parse_result(self, source: ExtractResult, reference: datetime): @@ -55,3 +50,19 @@ def parse_result(self, source: ExtractResult, reference: datetime): return None return result + + def parse(self, source: ExtractResult, reference: datetime = None) -> Optional[DateTimeParseResult]: + if not reference: + reference = datetime.now() + + result = self.parse_result(source, reference) + if not result: + return None + + if self.options & DateTimeOptions.SPLIT_DATE_AND_TIME and result.value and result.value.sub_date_time_entities: + result.value = self._date_time_resolution_for_split(result) + else: + result = self.set_parse_result( + result, has_before=False, has_after=False, has_since=False) + + return result diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_time.py index 4a4ae33f84..feae1f795a 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/base_minimal_time.py @@ -2,8 +2,7 @@ from datetime import datetime from recognizers_text.extractor import ExtractResult -from recognizers_date_time.date_time.base_time import BaseTimeExtractor, BaseTimeParser, TimeExtractorConfiguration, \ - TimeParserConfiguration +from recognizers_date_time.date_time.base_time import BaseTimeExtractor, BaseTimeParser from recognizers_date_time.date_time.constants import Constants from recognizers_date_time.date_time.utilities import DateTimeOptions, merge_all_tokens, TimeZoneUtility @@ -13,7 +12,7 @@ class BaseMinimalTimeExtractor(BaseTimeExtractor): def extractor_type_name(self) -> str: return Constants.SYS_DATETIME_TIME - def __init__(self, config: TimeExtractorConfiguration): + def __init__(self, config): super().__init__(config) def extract(self, source: str, reference: datetime = None) -> List[ExtractResult]: @@ -37,6 +36,6 @@ def extract(self, source: str, reference: datetime = None) -> List[ExtractResult class BaseMinimalTimeParser(BaseTimeParser): - def __init__(self, config: TimeParserConfiguration): + def __init__(self, config): super().__init__(config) self.config = config diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/common_configs.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/common_configs.py index 346830e2c9..8fa87f7bd5 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/common_configs.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/common_configs.py @@ -18,7 +18,7 @@ from recognizers_date_time.date_time.minimal.base_minimal_time import BaseMinimalTimeExtractor, BaseMinimalTimeParser -class MinimalCommonDateTimeParserConfiguration(BaseMinimalDateParserConfiguration): +class MinimalCommonDateTimeParserConfiguration: @property def month_of_year(self) -> Dict[str, int]: return self._month_of_year @@ -39,10 +39,6 @@ def cardinal_extractor(self) -> MinimalNumberExtractor: def integer_extractor(self) -> MinimalNumberExtractor: return self._integer_extractor - @property - def ordinal_extractor(self) -> MinimalNumberExtractor: - return None - @property def number_parser(self) -> MinimalNumberParser: return self._number_parser @@ -55,10 +51,6 @@ def date_extractor(self) -> DateTimeExtractor: def time_extractor(self) -> DateTimeExtractor: return self._time_extractor - @property - def date_time_extractor(self) -> DateTimeExtractor: - return None - @property def date_parser(self) -> DateTimeParser: return self._date_parser @@ -67,30 +59,10 @@ def date_parser(self) -> DateTimeParser: def time_parser(self) -> DateTimeParser: return self._time_parser - @property - def date_time_parser(self) -> DateTimeParser: - return None - - @property - def numbers(self) -> Dict[str, int]: - return {} - @property def day_of_month(self) -> Dict[str, int]: return self._day_of_month - @property - def day_of_week(self) -> Dict[str, int]: - return None - - @property - def unit_map(self): - return {} - - @property - def cardinal_map(self): - return {} - @property def utility_configuration(self) -> DateTimeUtilityConfiguration: return self._utility_configuration diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_extractor_config.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_extractor_config.py index d1e63a19b2..6ebbd050bf 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_extractor_config.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_extractor_config.py @@ -1,104 +1,16 @@ from typing import Pattern, List, Dict from recognizers_number import (BaseNumberExtractor, MinimalNumberParser, MinimalIntegerExtractor, - MinimalNumberParserConfiguration, MinimalOrdinalExtractor) + MinimalNumberParserConfiguration) from recognizers_text.utilities import RegExpUtility -from ...resources.minimal_date_time import MinimalDateTime -from ..extractors import DateTimeExtractor -from ..base_date import DateExtractorConfiguration -from .base_configs import MinimalDateTimeUtilityConfiguration -from ..constants import Constants -from ...resources.base_date_time import BaseDateTime +from recognizers_date_time.resources.minimal_date_time import MinimalDateTime +from recognizers_date_time.resources.base_date_time import BaseDateTime -class MinimalDateExtractorConfiguration(DateExtractorConfiguration): - - @property - def month_end(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def week_day_end(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def week_day_start(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def of_month(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def date_unit_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def for_the_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def week_day_and_day_of_month_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def week_day_and_day_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def relative_month_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def week_day_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def prefix_article_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def day_of_week(self) -> Dict[str, int]: - None +class MinimalDateExtractorConfiguration: @property def month_of_year(self) -> Dict[str, int]: - self._month_of_year - - @property - def ordinal_extractor(self): - return self._ordinal_extractor - - @property - def utility_configuration(self): - None - - @property - def strict_relative_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def year_suffix(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def more_than_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def less_than_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def in_connector_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def range_unit_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def since_year_suffix_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') + return self._month_of_year @property def check_both_before_after(self) -> bool: @@ -108,10 +20,6 @@ def check_both_before_after(self) -> bool: def date_regex_list(self) -> List[Pattern]: return self._date_regex_list - @property - def implicit_date_list(self) -> List[Pattern]: - return self._implicit_date_list - @property def integer_extractor(self) -> BaseNumberExtractor: return self._integer_extractor @@ -120,14 +28,14 @@ def integer_extractor(self) -> BaseNumberExtractor: def number_parser(self) -> MinimalNumberParser: return self._number_parser - @property - def duration_extractor(self) -> DateTimeExtractor: - return None - @property def range_connector_symbol_regex(self) -> Pattern: return self._range_connector_symbol_regex + @property + def strict_relative_regex(self): + return "" + def __init__(self, dmyDateFormat: bool = True): self._check_both_before_after = False if dmyDateFormat: @@ -148,14 +56,11 @@ def __init__(self, dmyDateFormat: bool = True): RegExpUtility.get_safe_reg_exp(date_extractor_11), ] - self._implicit_date_list = [] self._integer_extractor = MinimalIntegerExtractor() self._number_parser = MinimalNumberParser( MinimalNumberParserConfiguration()) - self._utility_configuration = MinimalDateTimeUtilityConfiguration() self._range_connector_symbol_regex = RegExpUtility.get_safe_reg_exp( BaseDateTime.RangeConnectorSymbolRegex ) self._check_both_before_after = MinimalDateTime.CheckBothBeforeAfter - self._ordinal_extractor = MinimalOrdinalExtractor() self._month_of_year = MinimalDateTime.MonthOfYear diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_parser_config.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_parser_config.py index 7f72515187..7ccc2a52a3 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_parser_config.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/date_parser_config.py @@ -1,111 +1,21 @@ from typing import Pattern, List, Dict -from recognizers_text.utilities import RegExpUtility from recognizers_number import BaseNumberExtractor, BaseNumberParser -from ...resources.minimal_date_time import MinimalDateTime -from ..extractors import DateTimeExtractor -from ..parsers import DateTimeParser -from ..utilities import DateTimeUtilityConfiguration -from ..base_date import DateParserConfiguration -from ..base_configs import BaseDateParserConfiguration +from recognizers_date_time.resources.minimal_date_time import MinimalDateTime +from recognizers_date_time.date_time.extractors import DateTimeExtractor +from recognizers_date_time.date_time.utilities import DateTimeUtilityConfiguration -class MinimalDateParserConfiguration(DateParserConfiguration): - @property - def on_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def special_day_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def special_day_with_num_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def next_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def unit_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def month_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def week_day_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def last_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def this_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def week_day_of_month_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def for_the_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def week_day_and_day_of_month_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def week_day_and_day_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def relative_month_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - - @property - def relative_week_day_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') +class MinimalDateParserConfiguration: @property def date_token_prefix(self) -> str: return "" - def get_swift_day(self, source: str) -> int: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - def get_swift_month(self, source: str) -> int: - return None - - def is_cardinal_last(self, source: str) -> bool: - return None - @property def check_both_before_after(self) -> bool: return self._check_both_before_after - @property - def ordinal_extractor(self) -> BaseNumberExtractor: - return self._ordinal_extractor - @property def integer_extractor(self) -> BaseNumberExtractor: return self._integer_extractor @@ -118,16 +28,6 @@ def cardinal_extractor(self) -> BaseNumberExtractor: def date_extractor(self) -> DateTimeExtractor: return self._date_extractor - @property - def duration_extractor(self) -> DateTimeExtractor: - return None - - - @property - def duration_parser(self) -> DateTimeParser: - return None - - @property def number_parser(self) -> BaseNumberParser: return self._number_parser @@ -140,18 +40,6 @@ def month_of_year(self) -> Dict[str, int]: def day_of_month(self) -> Dict[str, int]: return self._day_of_month - @property - def day_of_week(self) -> Dict[str, int]: - return self._day_of_week - - @property - def unit_map(self) -> Dict[str, str]: - return self._unit_map - - @property - def cardinal_map(self) -> Dict[str, int]: - return self._cardinal_map - @property def date_regex(self) -> List[Pattern]: return self._date_regex @@ -160,24 +48,13 @@ def date_regex(self) -> List[Pattern]: def utility_configuration(self) -> DateTimeUtilityConfiguration: return self._utility_configuration - _relative_day_regex = None - - _next_prefix_regex = None - - _past_prefix_regex = None - - - def __init__(self, config: BaseDateParserConfiguration): - self._ordinal_extractor = config.ordinal_extractor + def __init__(self, config): self._integer_extractor = config.integer_extractor self._cardinal_extractor = config.cardinal_extractor self._date_extractor = config.date_extractor self._number_parser = config.number_parser self._month_of_year = config.month_of_year self._day_of_month = config.day_of_month - self._day_of_week = config.day_of_week - self._unit_map = config.unit_map - self._cardinal_map = config.cardinal_map self._date_regex = config.date_extractor.config.date_regex_list self._utility_configuration = config.utility_configuration self._check_both_before_after = MinimalDateTime.CheckBothBeforeAfter diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_extractor_config.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_extractor_config.py index a80fed58c3..043e1a5da6 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_extractor_config.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_extractor_config.py @@ -1,10 +1,9 @@ from typing import List, Pattern -from recognizers_text import RegExpUtility, Extractor +from recognizers_text import Extractor from recognizers_number import MinimalIntegerExtractor from recognizers_date_time.resources.minimal_date_time import MinimalDateTime from recognizers_date_time.date_time.extractors import DateTimeExtractor -from recognizers_date_time.date_time.base_minimal_merged import MinimalMergedExtractorConfiguration from recognizers_date_time.date_time.minimal.base_minimal_date import BaseMinimalDateExtractor from recognizers_date_time.date_time.minimal.base_minimal_time import BaseMinimalTimeExtractor from recognizers_date_time.date_time.minimal.date_extractor_config import MinimalDateExtractorConfiguration @@ -12,7 +11,7 @@ from recognizers_date_time.resources.base_date_time import BaseDateTime -class BaseMinimalMergedExtractorConfiguration(MinimalMergedExtractorConfiguration): +class BaseMinimalMergedExtractorConfiguration: @property def check_both_before_after(self): return self._check_both_before_after @@ -25,10 +24,6 @@ def date_extractor(self) -> DateTimeExtractor: def time_extractor(self) -> DateTimeExtractor: return self._time_extractor - @property - def date_time_extractor(self) -> DateTimeExtractor: - return None - @property def integer_extractor(self) -> Extractor: return self._integer_extractor @@ -37,27 +32,13 @@ def integer_extractor(self) -> Extractor: def equal_regex(self) -> Pattern: return self._equal_regex - @property - def ambiguous_range_modifier_prefix(self) -> None: - return None - - @property - def potential_ambiguous_range_regex(self) -> None: - return None - - @property - def number_ending_pattern(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - @property def filter_word_regex_list(self) -> List[Pattern]: - return self._filter_word_regex_list + return [] def __init__(self): - self._date_extractor = BaseMinimalDateExtractor( - MinimalDateExtractorConfiguration()) + self._date_extractor = BaseMinimalDateExtractor(MinimalDateExtractorConfiguration()) self._time_extractor = BaseMinimalTimeExtractor(MinimalTimeExtractorConfiguration()) self._integer_extractor = MinimalIntegerExtractor() - self._filter_word_regex_list = [] self._equal_regex = BaseDateTime.EqualRegex self._check_both_before_after = MinimalDateTime.CheckBothBeforeAfter diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_parser_config.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_parser_config.py index 61831e21af..e54afd2da5 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_parser_config.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/merged_parser_config.py @@ -4,15 +4,10 @@ from recognizers_date_time.date_time.minimal.common_configs import MinimalCommonDateTimeParserConfiguration from recognizers_date_time.date_time.base_date import BaseDateParser from recognizers_date_time.date_time.base_time import BaseTimeParser -from recognizers_date_time.date_time.base_minimal_merged import MinimalMergedParserConfiguration from recognizers_date_time.resources.minimal_date_time import MinimalDateTime, BaseDateTime -from recognizers_date_time.date_time.parsers import DateTimeParser -class BaseMinimalMergedParserConfiguration(MinimalCommonDateTimeParserConfiguration, MinimalMergedParserConfiguration): - @property - def around_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') +class BaseMinimalMergedParserConfiguration(MinimalCommonDateTimeParserConfiguration): @property def equal_regex(self) -> Pattern: @@ -22,22 +17,6 @@ def equal_regex(self) -> Pattern: def year_regex(self) -> Pattern: return self._year_regex - @property - def suffix_after(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def before_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def after_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - - @property - def since_regex(self) -> Pattern: - return RegExpUtility.get_safe_reg_exp(f'^[.]') - @property def date_parser(self) -> BaseDateParser: return self._date_parser @@ -46,16 +25,7 @@ def date_parser(self) -> BaseDateParser: def time_parser(self) -> BaseTimeParser: return self._time_parser - @property - def date_time_parser(self) -> BaseTimeParser: - return None - - @property - def time_zone_parser(self) -> DateTimeParser: - return self._time_zone_parser - - def __init__(self, config, dmyDateFormat = True): + def __init__(self, dmyDateFormat = True): MinimalCommonDateTimeParserConfiguration.__init__(self, dmyDateFormat) - self._time_zone_parser = config.time_zone_parser self._equal_regex = RegExpUtility.get_safe_reg_exp(BaseDateTime.EqualRegex) self._year_regex = RegExpUtility.get_safe_reg_exp(MinimalDateTime.YearRegex) diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_extractor_config.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_extractor_config.py index 5f2a628e09..ec1e0da93a 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_extractor_config.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_extractor_config.py @@ -1,38 +1,17 @@ from typing import List, Pattern from recognizers_text.utilities import RegExpUtility -from ...resources.minimal_date_time import MinimalDateTime -from ..base_time import TimeExtractorConfiguration +from ...resources.minimal_date_time import MinimalDateTime -class MinimalTimeExtractorConfiguration(TimeExtractorConfiguration): - @property - def time_zone_extractor(self): - return None - +class MinimalTimeExtractorConfiguration: @property def options(self): - return self._options - - @property - def dmy_date_format(self) -> bool: - return self._dmy_date_format + return 0 @property def time_regex_list(self) -> List[Pattern]: return self._time_regex_list - @property - def at_regex(self) -> Pattern: - return None - - @property - def ish_regex(self) -> Pattern: - return None - - @property - def time_before_after_regex(self) -> Pattern: - return None - def __init__(self): super().__init__() self._time_regex_list: List[Pattern] = MinimalTimeExtractorConfiguration.get_time_regex_list() diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_parser_config.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_parser_config.py index c78ec4684c..a7b4e333b9 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_parser_config.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/minimal/time_parser_config.py @@ -1,13 +1,12 @@ from typing import List, Pattern, Dict from recognizers_text import RegExpUtility -from ..base_time import TimeParserConfiguration -from ..base_configs import BaseDateParserConfiguration, DateTimeUtilityConfiguration +from ..base_configs import DateTimeUtilityConfiguration from .time_extractor_config import MinimalTimeExtractorConfiguration from ..parsers import DateTimeParser -class MinimalTimeParserConfiguration(TimeParserConfiguration): +class MinimalTimeParserConfiguration: @property def time_token_prefix(self) -> str: return "" @@ -22,7 +21,7 @@ def time_regexes(self) -> List[Pattern]: @property def numbers(self) -> Dict[str, int]: - return self._numbers + return {} @property def utility_configuration(self) -> DateTimeUtilityConfiguration: @@ -32,9 +31,8 @@ def utility_configuration(self) -> DateTimeUtilityConfiguration: def time_zone_parser(self) -> DateTimeParser: return self._time_zone_parser - def __init__(self, config: BaseDateParserConfiguration): + def __init__(self, config): self._time_regexes: List[Pattern] = MinimalTimeExtractorConfiguration.get_time_regex_list() self._utility_configuration = config.utility_configuration - self._numbers: Dict[str, int] = config.numbers self._time_zone_parser = config.time_zone_parser diff --git a/Python/tests/runner.py b/Python/tests/runner.py index 2f15ed6933..3de37503ea 100644 --- a/Python/tests/runner.py +++ b/Python/tests/runner.py @@ -72,21 +72,22 @@ def get_specs(recognizer, entity): ENTITY_PATTERN = re.compile('(.*)(Model|Parser|Extractor|Resolver)(.*)') CULTURES = { - 'Arabic': Culture.Arabic, - 'Chinese': Culture.Chinese, - 'Catalan': Culture.Catalan, - 'Dutch': Culture.Dutch, - 'English': Culture.English, - 'EnglishOthers': Culture.EnglishOthers, - 'French': Culture.French, - 'Italian': Culture.Italian, - 'Japanese': Culture.Japanese, - 'Korean': Culture.Korean, - 'Portuguese': Culture.Portuguese, - 'Spanish': Culture.Spanish, - 'SpanishMexican': Culture.SpanishMexican, - 'Turkish': Culture.Turkish, - 'German': Culture.German, + # 'Arabic': Culture.Arabic, + # 'Chinese': Culture.Chinese, + # 'Catalan': Culture.Catalan, + # 'Dutch': Culture.Dutch, + # 'English': Culture.English, + # 'EnglishOthers': Culture.EnglishOthers, + # 'French': Culture.French, + # 'Italian': Culture.Italian, + # 'Japanese': Culture.Japanese, + # 'Korean': Culture.Korean, + # 'Portuguese': Culture.Portuguese, + # 'Spanish': Culture.Spanish, + # 'SpanishMexican': Culture.SpanishMexican, + # 'Turkish': Culture.Turkish, + # 'German': Culture.German, + 'Minimal': Culture.Minimal } SPECS = get_all_specs() diff --git a/Specs/DateTime/Minimal/DateTimeModel.json b/Specs/DateTime/Minimal/DateTimeModel.json new file mode 100644 index 0000000000..dcd3403063 --- /dev/null +++ b/Specs/DateTime/Minimal/DateTimeModel.json @@ -0,0 +1,263 @@ +[ + { + "Input": "4:15", + "Context": { + "ReferenceDateTime": "2023-12-11T01:00:00" + }, + "Results": [ + { + "Text": "4:15", + "Start": 0, + "End": 3, + "TypeName": "datetimeV2.time", + "Resolution": { + "values": [ + { + "timex": "T04:15", + "type": "time", + "value": "04:15:00" + }, + { + "timex": "T16:15", + "type": "time", + "value": "16:15:00" + } + ] + } + } + ] + }, + { + "Input": "9 am", + "Context": { + "ReferenceDateTime": "2023-12-11T01:00:00" + }, + "Results": [ + { + "Text": "9 am", + "Start": 0, + "End": 3, + "TypeName": "datetimeV2.time", + "Resolution": { + "values": [ + { + "timex": "T09", + "type": "time", + "value": "09:00:00" + } + ] + } + } + ] + }, + { + "Input": "7:30 pm", + "Context": { + "ReferenceDateTime": "2023-12-11T01:00:00" + }, + "Results": [ + { + "Text": "7:30 pm", + "Start": 0, + "End": 6, + "TypeName": "datetimeV2.time", + "Resolution": { + "values": [ + { + "timex": "T19:30", + "type": "time", + "value": "19:30:00" + } + ] + } + } + ] + }, + { + "Input": "21:40", + "Context": { + "ReferenceDateTime": "2023-12-11T01:00:00" + }, + "Results": [ + { + "Text": "21:40", + "Start": 0, + "End": 4, + "TypeName": "datetimeV2.time", + "Resolution": { + "values": [ + { + "timex": "T21:40", + "type": "time", + "value": "21:40:00" + } + ] + } + } + ] + }, + { + "Input": "15/03/21", + "Context": { + "ReferenceDateTime": "2023-12-11T01:00:00" + }, + "Results": [ + { + "Text": "15/03/21", + "Start": 0, + "End": 7, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "2021-03-15", + "type": "date", + "value": "2021-03-15" + } + ] + } + } + ] + }, + { + "Input": "16/12", + "Context": { + "ReferenceDateTime": "2023-12-11T01:00:00" + }, + "Results": [ + { + "Text": "16/12", + "Start": 0, + "End": 4, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "XXXX-12-15", + "type": "date", + "value": "2022-12-16" + }, + { + "timex": "XXXX-12-16", + "type": "date", + "value": "2023-12-16" + } + ] + } + } + ] + }, + { + "Input": "1/2", + "Context": { + "ReferenceDateTime": "2023-12-11T01:00:00" + }, + "Results": [ + { + "Text": "1/2", + "Start": 0, + "End": 2, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "XXXX-02-01", + "type": "date", + "value": "2024-02-01" + }, + { + "timex": "XXXX-02-01", + "type": "date", + "value": "2023-02-01" + } + ] + } + } + ] + }, + { + "Input": "25-8-91", + "Context": { + "ReferenceDateTime": "2023-12-11T01:00:00" + }, + "Results": [ + { + "Text": "25-8-91", + "Start": 0, + "End": 6, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "1991-08-25", + "type": "date", + "value": "1991-08-25" + } + ] + } + } + ] + }, + { + "Input": "30.10.18", + "Context": { + "ReferenceDateTime": "2023-12-11T01:00:00" + }, + "Results": [ + { + "Text": "30.10.18", + "Start": 0, + "End": 7, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "2018-10-30", + "type": "date", + "value": "2018-10-30" + } + ] + } + } + ] + } + , + { + "Input": "Book me an appointment on 19/11/24 at 9:21 am", + "Context": { + "ReferenceDateTime": "2023-12-11T01:00:00" + }, + "Results": [ + { + "Text": "19/11/24", + "Start": 26, + "End": 33, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "2024-11-19", + "type": "date", + "value": "2024-11-19" + } + ] + } + }, + { + "Text": "9:21 am", + "Start": 38, + "End": 44, + "TypeName": "datetimeV2.time", + "Resolution": { + "values": [ + { + "timex": "T09:21", + "type": "time", + "value": "09:21:00" + } + ] + } + } + ] + } +] \ No newline at end of file diff --git a/Specs/Number/Minimal/NumberModel.json b/Specs/Number/Minimal/NumberModel.json new file mode 100644 index 0000000000..650e94b3e7 --- /dev/null +++ b/Specs/Number/Minimal/NumberModel.json @@ -0,0 +1,122 @@ +[ + { + "Input": "192.", + "Results": [ + { + "Text": "192", + "TypeName": "number", + "Resolution": { + "subtype": "integer", + "value": "192" + }, + "Start": 0, + "End": 2 + } + ] + }, + { + "Input": "59", + "Results": [ + { + "Text": "59", + "TypeName": "number", + "Resolution": { + "subtype": "integer", + "value": "59" + }, + "Start": 0, + "End": 1 + } + ] + }, + { + "Input": "1.54", + "Results": [ + { + "Text": "1.54", + "TypeName": "number", + "Resolution": { + "subtype": "integer", + "value": "1.54" + }, + "Start": 0, + "End": 3 + } + ] + }, + { + "Input": "53,859", + "Results": [ + { + "Text": "53,859", + "TypeName": "number", + "Resolution": { + "subtype": "integer", + "value": "53859" + }, + "Start": 0, + "End": 5 + } + ] + }, + { + "Input": "1,000,000", + "Results": [ + { + "Text": "1,000,000", + "TypeName": "number", + "Resolution": { + "subtype": "integer", + "value": "1000000" + }, + "Start": 0, + "End": 8 + } + ] + }, + { + "Input": "أريد 43", + "Results": [ + { + "Text": "43", + "TypeName": "number", + "Resolution": { + "subtype": "integer", + "value": "43" + }, + "Start": 5, + "End": 6 + } + ] + }, + { + "Input": "Puc seure a la fila 489?", + "Results": [ + { + "Text": "489", + "TypeName": "number", + "Resolution": { + "subtype": "integer", + "value": "489" + }, + "Start": 20, + "End": 22 + } + ] + }, + { + "Input": "Ik wil 5123 spijkerbroeken", + "Results": [ + { + "Text": "5123", + "TypeName": "number", + "Resolution": { + "subtype": "integer", + "value": "5123" + }, + "Start": 7, + "End": 10 + } + ] + } +] \ No newline at end of file diff --git a/Specs/NumberWithUnit/Minimal/CurrencyModel.json b/Specs/NumberWithUnit/Minimal/CurrencyModel.json new file mode 100644 index 0000000000..1d41a7ceb8 --- /dev/null +++ b/Specs/NumberWithUnit/Minimal/CurrencyModel.json @@ -0,0 +1,77 @@ +[ + { + "Input": "$9,843.50", + "Results": [ + { + "Text": "$9,843.50", + "TypeName": "currency", + "Resolution": { + "value": "9843.5", + "unit": "Dollar" + }, + "Start": 0, + "End": 8 + } + ] + }, + { + "Input": "buy £23 worth of stock NOW", + "Results": [ + { + "Text": "£23", + "TypeName": "currency", + "Resolution": { + "value": "23", + "unit": "Pound" + }, + "Start": 4, + "End": 6 + } + ] + }, + { + "Input": "I only have 50 cent", + "Results": [ + { + "Text": "50 cent", + "TypeName": "currency", + "Resolution": { + "value": "50", + "unit": "Cent" + }, + "Start": 12, + "End": 18 + } + ] + }, + { + "Input": "285 dollars", + "Results": [ + { + "Text": "285 dollars", + "TypeName": "currency", + "Resolution": { + "value": "285", + "unit": "Dollar" + }, + "Start": 0, + "End": 10 + } + ] + }, + { + "Input": "1,000,000 euro", + "Results": [ + { + "Text": "1,000,000 euro", + "TypeName": "currency", + "Resolution": { + "value": "1000000", + "unit": "Euro" + }, + "Start": 0, + "End": 13 + } + ] + } +] \ No newline at end of file From 7c8f517a3f319acd10a270a08d5d7cc7ae11ad31 Mon Sep 17 00:00:00 2001 From: Sam Hickey Date: Wed, 21 Feb 2024 17:37:28 +0000 Subject: [PATCH 3/8] Increment package version --- .../datatypes-timex-expression/setup.py | 2 +- Python/libraries/recognizers-choice/setup.py | 2 +- Python/libraries/recognizers-date-time/setup.py | 2 +- .../recognizers-number-with-unit/setup.py | 2 +- Python/libraries/recognizers-number/setup.py | 2 +- Python/libraries/recognizers-sequence/setup.py | 2 +- Python/libraries/recognizers-suite/setup.py | 16 ++++++++-------- Python/libraries/recognizers-text/setup.py | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Python/libraries/datatypes-timex-expression/setup.py b/Python/libraries/datatypes-timex-expression/setup.py index 2e39a36a32..52bfcecbbb 100644 --- a/Python/libraries/datatypes-timex-expression/setup.py +++ b/Python/libraries/datatypes-timex-expression/setup.py @@ -11,7 +11,7 @@ def read(fname): NAME = 'datatypes_timex_expression_genesys' -VERSION = '1.1.9' +VERSION = '1.1.13a0' REQUIRES = [] setup( diff --git a/Python/libraries/recognizers-choice/setup.py b/Python/libraries/recognizers-choice/setup.py index b2cd09a8b4..0afaeed0e9 100644 --- a/Python/libraries/recognizers-choice/setup.py +++ b/Python/libraries/recognizers-choice/setup.py @@ -11,7 +11,7 @@ def read(fname): NAME = 'recognizers-text-choice-genesys' -VERSION = '1.1.9' +VERSION = '1.1.13a0' REQUIRES = ['recognizers-text-genesys', 'regex', 'grapheme'] setup( diff --git a/Python/libraries/recognizers-date-time/setup.py b/Python/libraries/recognizers-date-time/setup.py index f9851bd0fa..2c63da5adb 100644 --- a/Python/libraries/recognizers-date-time/setup.py +++ b/Python/libraries/recognizers-date-time/setup.py @@ -10,7 +10,7 @@ def read(fname): NAME = 'recognizers-text-date-time-genesys' -VERSION = '1.1.9' +VERSION = '1.1.13a0' REQUIRES = ['recognizers-text-genesys', 'recognizers-text-number-genesys', 'recognizers-text-number-with-unit-genesys', 'regex', 'datedelta', 'python-dateutil'] diff --git a/Python/libraries/recognizers-number-with-unit/setup.py b/Python/libraries/recognizers-number-with-unit/setup.py index 7d29dfd700..ae55702e06 100644 --- a/Python/libraries/recognizers-number-with-unit/setup.py +++ b/Python/libraries/recognizers-number-with-unit/setup.py @@ -10,7 +10,7 @@ def read(fname): NAME = "recognizers-text-number-with-unit-genesys" -VERSION = "1.1.9" +VERSION = "1.1.13a0" REQUIRES = ['recognizers-text-genesys', 'recognizers-text-number-genesys', 'regex'] setup( diff --git a/Python/libraries/recognizers-number/setup.py b/Python/libraries/recognizers-number/setup.py index 5ba1ab81d9..4f7ae98d0d 100644 --- a/Python/libraries/recognizers-number/setup.py +++ b/Python/libraries/recognizers-number/setup.py @@ -10,7 +10,7 @@ def read(fname): NAME = "recognizers-text-number-genesys" -VERSION = "1.1.9" +VERSION = "1.1.13a0" REQUIRES = ['recognizers-text-genesys', 'regex'] setup( diff --git a/Python/libraries/recognizers-sequence/setup.py b/Python/libraries/recognizers-sequence/setup.py index 2c02f3e598..c252f1b5db 100644 --- a/Python/libraries/recognizers-sequence/setup.py +++ b/Python/libraries/recognizers-sequence/setup.py @@ -10,7 +10,7 @@ def read(fname): NAME = "recognizers-text-sequence-genesys" -VERSION = "1.1.9" +VERSION = "1.1.13a0" REQUIRES = ['recognizers-text-genesys', 'recognizers-text-number-genesys', 'regex'] setup( diff --git a/Python/libraries/recognizers-suite/setup.py b/Python/libraries/recognizers-suite/setup.py index 49f226be1e..e469991481 100644 --- a/Python/libraries/recognizers-suite/setup.py +++ b/Python/libraries/recognizers-suite/setup.py @@ -10,15 +10,15 @@ def read(fname): NAME = 'recognizers-text-suite-genesys' -VERSION = '1.1.9' +VERSION = '1.1.13a0' REQUIRES = [ - 'recognizers-text-genesys==1.1.9', - 'recognizers-text-number-genesys==1.1.9', - 'recognizers-text-number-with-unit-genesys==1.1.9', - 'recognizers-text-date-time-genesys==1.1.9', - 'recognizers-text-sequence-genesys==1.1.9', - 'recognizers-text-choice-genesys==1.1.9', - 'datatypes_timex_expression_genesys==1.1.9' + 'recognizers-text-genesys==1.1.13a0', + 'recognizers-text-number-genesys==1.1.13a0', + 'recognizers-text-number-with-unit-genesys==1.1.13a0', + 'recognizers-text-date-time-genesys==1.1.13a0', + 'recognizers-text-sequence-genesys==1.1.13a0', + 'recognizers-text-choice-genesys==1.1.13a0', + 'datatypes_timex_expression_genesys==1.1.13a0' ] setup( diff --git a/Python/libraries/recognizers-text/setup.py b/Python/libraries/recognizers-text/setup.py index 8c9a7c5e97..c8107be820 100644 --- a/Python/libraries/recognizers-text/setup.py +++ b/Python/libraries/recognizers-text/setup.py @@ -4,7 +4,7 @@ from setuptools import setup, find_packages NAME = "recognizers-text-genesys" -VERSION = "1.1.9" +VERSION = "1.1.13a0" REQUIRES = ['emoji==1.1.0', 'multipledispatch'] setup( From 6e0f072db082ed532fe8cf9974387b891f067122 Mon Sep 17 00:00:00 2001 From: Sam Hickey Date: Wed, 21 Feb 2024 17:38:29 +0000 Subject: [PATCH 4/8] Unskip language tests --- Python/tests/runner.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Python/tests/runner.py b/Python/tests/runner.py index 3de37503ea..2a59935b6e 100644 --- a/Python/tests/runner.py +++ b/Python/tests/runner.py @@ -72,21 +72,21 @@ def get_specs(recognizer, entity): ENTITY_PATTERN = re.compile('(.*)(Model|Parser|Extractor|Resolver)(.*)') CULTURES = { - # 'Arabic': Culture.Arabic, - # 'Chinese': Culture.Chinese, - # 'Catalan': Culture.Catalan, - # 'Dutch': Culture.Dutch, - # 'English': Culture.English, - # 'EnglishOthers': Culture.EnglishOthers, - # 'French': Culture.French, - # 'Italian': Culture.Italian, - # 'Japanese': Culture.Japanese, - # 'Korean': Culture.Korean, - # 'Portuguese': Culture.Portuguese, - # 'Spanish': Culture.Spanish, - # 'SpanishMexican': Culture.SpanishMexican, - # 'Turkish': Culture.Turkish, - # 'German': Culture.German, + 'Arabic': Culture.Arabic, + 'Chinese': Culture.Chinese, + 'Catalan': Culture.Catalan, + 'Dutch': Culture.Dutch, + 'English': Culture.English, + 'EnglishOthers': Culture.EnglishOthers, + 'French': Culture.French, + 'Italian': Culture.Italian, + 'Japanese': Culture.Japanese, + 'Korean': Culture.Korean, + 'Portuguese': Culture.Portuguese, + 'Spanish': Culture.Spanish, + 'SpanishMexican': Culture.SpanishMexican, + 'Turkish': Culture.Turkish, + 'German': Culture.German, 'Minimal': Culture.Minimal } From 47b79d2e2fa086ddbf34a101bfa2bdd101441225 Mon Sep 17 00:00:00 2001 From: Sam Hickey Date: Wed, 21 Feb 2024 17:46:50 +0000 Subject: [PATCH 5/8] Update package version --- .../datatypes-timex-expression/setup.py | 2 +- Python/libraries/recognizers-choice/setup.py | 2 +- Python/libraries/recognizers-date-time/setup.py | 2 +- .../recognizers-number-with-unit/setup.py | 2 +- .../number/minimal/extractors.py | 14 -------------- Python/libraries/recognizers-number/setup.py | 2 +- Python/libraries/recognizers-sequence/setup.py | 2 +- Python/libraries/recognizers-suite/setup.py | 16 ++++++++-------- Python/libraries/recognizers-text/setup.py | 2 +- 9 files changed, 15 insertions(+), 29 deletions(-) diff --git a/Python/libraries/datatypes-timex-expression/setup.py b/Python/libraries/datatypes-timex-expression/setup.py index 52bfcecbbb..059d4fa61b 100644 --- a/Python/libraries/datatypes-timex-expression/setup.py +++ b/Python/libraries/datatypes-timex-expression/setup.py @@ -11,7 +11,7 @@ def read(fname): NAME = 'datatypes_timex_expression_genesys' -VERSION = '1.1.13a0' +VERSION = '1.1.13a1' REQUIRES = [] setup( diff --git a/Python/libraries/recognizers-choice/setup.py b/Python/libraries/recognizers-choice/setup.py index 0afaeed0e9..881476f6c2 100644 --- a/Python/libraries/recognizers-choice/setup.py +++ b/Python/libraries/recognizers-choice/setup.py @@ -11,7 +11,7 @@ def read(fname): NAME = 'recognizers-text-choice-genesys' -VERSION = '1.1.13a0' +VERSION = '1.1.13a1' REQUIRES = ['recognizers-text-genesys', 'regex', 'grapheme'] setup( diff --git a/Python/libraries/recognizers-date-time/setup.py b/Python/libraries/recognizers-date-time/setup.py index 2c63da5adb..f7d6502e5e 100644 --- a/Python/libraries/recognizers-date-time/setup.py +++ b/Python/libraries/recognizers-date-time/setup.py @@ -10,7 +10,7 @@ def read(fname): NAME = 'recognizers-text-date-time-genesys' -VERSION = '1.1.13a0' +VERSION = '1.1.13a1' REQUIRES = ['recognizers-text-genesys', 'recognizers-text-number-genesys', 'recognizers-text-number-with-unit-genesys', 'regex', 'datedelta', 'python-dateutil'] diff --git a/Python/libraries/recognizers-number-with-unit/setup.py b/Python/libraries/recognizers-number-with-unit/setup.py index ae55702e06..f2e708e178 100644 --- a/Python/libraries/recognizers-number-with-unit/setup.py +++ b/Python/libraries/recognizers-number-with-unit/setup.py @@ -10,7 +10,7 @@ def read(fname): NAME = "recognizers-text-number-with-unit-genesys" -VERSION = "1.1.13a0" +VERSION = "1.1.13a1" REQUIRES = ['recognizers-text-genesys', 'recognizers-text-number-genesys', 'regex'] setup( diff --git a/Python/libraries/recognizers-number/recognizers_number/number/minimal/extractors.py b/Python/libraries/recognizers-number/recognizers_number/number/minimal/extractors.py index 31da9926a9..84c647284a 100644 --- a/Python/libraries/recognizers-number/recognizers_number/number/minimal/extractors.py +++ b/Python/libraries/recognizers-number/recognizers_number/number/minimal/extractors.py @@ -190,17 +190,3 @@ def __init__(self, placeholder: str = MinimalNumeric.PlaceHolderDefault): placeholder), val='DoubleNum') ] - - -class MinimalOrdinalExtractor(BaseNumberExtractor): - @property - def regexes(self) -> List[ - NamedTuple('re_val', [('re', Pattern), ('val', str)])]: - return self.__regexes - - @property - def _extract_type(self) -> str: - return Constants.SYS_NUM_ORDINAL - - def __init__(self): - self.__regexes = [] diff --git a/Python/libraries/recognizers-number/setup.py b/Python/libraries/recognizers-number/setup.py index 4f7ae98d0d..2ff07606e3 100644 --- a/Python/libraries/recognizers-number/setup.py +++ b/Python/libraries/recognizers-number/setup.py @@ -10,7 +10,7 @@ def read(fname): NAME = "recognizers-text-number-genesys" -VERSION = "1.1.13a0" +VERSION = "1.1.13a1" REQUIRES = ['recognizers-text-genesys', 'regex'] setup( diff --git a/Python/libraries/recognizers-sequence/setup.py b/Python/libraries/recognizers-sequence/setup.py index c252f1b5db..596d36d53b 100644 --- a/Python/libraries/recognizers-sequence/setup.py +++ b/Python/libraries/recognizers-sequence/setup.py @@ -10,7 +10,7 @@ def read(fname): NAME = "recognizers-text-sequence-genesys" -VERSION = "1.1.13a0" +VERSION = "1.1.13a1" REQUIRES = ['recognizers-text-genesys', 'recognizers-text-number-genesys', 'regex'] setup( diff --git a/Python/libraries/recognizers-suite/setup.py b/Python/libraries/recognizers-suite/setup.py index e469991481..1ae0dbe20f 100644 --- a/Python/libraries/recognizers-suite/setup.py +++ b/Python/libraries/recognizers-suite/setup.py @@ -10,15 +10,15 @@ def read(fname): NAME = 'recognizers-text-suite-genesys' -VERSION = '1.1.13a0' +VERSION = '1.1.13a1' REQUIRES = [ - 'recognizers-text-genesys==1.1.13a0', - 'recognizers-text-number-genesys==1.1.13a0', - 'recognizers-text-number-with-unit-genesys==1.1.13a0', - 'recognizers-text-date-time-genesys==1.1.13a0', - 'recognizers-text-sequence-genesys==1.1.13a0', - 'recognizers-text-choice-genesys==1.1.13a0', - 'datatypes_timex_expression_genesys==1.1.13a0' + 'recognizers-text-genesys==1.1.13a1', + 'recognizers-text-number-genesys==1.1.13a1', + 'recognizers-text-number-with-unit-genesys==1.1.13a1', + 'recognizers-text-date-time-genesys==1.1.13a1', + 'recognizers-text-sequence-genesys==1.1.13a1', + 'recognizers-text-choice-genesys==1.1.13a1', + 'datatypes_timex_expression_genesys==1.1.13a1' ] setup( diff --git a/Python/libraries/recognizers-text/setup.py b/Python/libraries/recognizers-text/setup.py index c8107be820..d53fe5afa6 100644 --- a/Python/libraries/recognizers-text/setup.py +++ b/Python/libraries/recognizers-text/setup.py @@ -4,7 +4,7 @@ from setuptools import setup, find_packages NAME = "recognizers-text-genesys" -VERSION = "1.1.13a0" +VERSION = "1.1.13a1" REQUIRES = ['emoji==1.1.0', 'multipledispatch'] setup( From 1dd1478180f6f7013afbc93a5de5bba1dc38847d Mon Sep 17 00:00:00 2001 From: Sam Hickey Date: Thu, 22 Feb 2024 14:27:21 +0000 Subject: [PATCH 6/8] Add new recogniser for minimal-other --- .../datatypes-timex-expression/setup.py | 2 +- Python/libraries/recognizers-choice/setup.py | 2 +- .../date_time/date_time_recognizer.py | 14 +++-- .../libraries/recognizers-date-time/setup.py | 2 +- .../number_with_unit/minimal/parsers.py | 11 ++-- .../number_with_unit_recognizer.py | 8 ++- .../recognizers-number-with-unit/setup.py | 2 +- .../number/minimal/parsers.py | 62 +------------------ .../number/number_recognizer.py | 16 ++--- Python/libraries/recognizers-number/setup.py | 2 +- .../libraries/recognizers-sequence/setup.py | 2 +- Python/libraries/recognizers-suite/setup.py | 16 ++--- .../recognizers_text/culture.py | 1 + Python/libraries/recognizers-text/setup.py | 2 +- Python/tests/runner.py | 30 ++++----- 15 files changed, 64 insertions(+), 108 deletions(-) diff --git a/Python/libraries/datatypes-timex-expression/setup.py b/Python/libraries/datatypes-timex-expression/setup.py index 059d4fa61b..911747ecfa 100644 --- a/Python/libraries/datatypes-timex-expression/setup.py +++ b/Python/libraries/datatypes-timex-expression/setup.py @@ -11,7 +11,7 @@ def read(fname): NAME = 'datatypes_timex_expression_genesys' -VERSION = '1.1.13a1' +VERSION = '1.1.13a2' REQUIRES = [] setup( diff --git a/Python/libraries/recognizers-choice/setup.py b/Python/libraries/recognizers-choice/setup.py index 881476f6c2..b1bd8d8efc 100644 --- a/Python/libraries/recognizers-choice/setup.py +++ b/Python/libraries/recognizers-choice/setup.py @@ -11,7 +11,7 @@ def read(fname): NAME = 'recognizers-text-choice-genesys' -VERSION = '1.1.13a1' +VERSION = '1.1.13a2' REQUIRES = ['recognizers-text-genesys', 'regex', 'grapheme'] setup( diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py index 1b1bce1a3b..0fdf58ddde 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py @@ -47,8 +47,7 @@ class DateTimeRecognizer(Recognizer[DateTimeOptions]): def __init__(self, target_culture: str = None, options: DateTimeOptions = DateTimeOptions.NONE, - lazy_initialization: bool = True, dmyDateFormat: bool = True): - self.dmyDateFormat = dmyDateFormat + lazy_initialization: bool = True): if options < DateTimeOptions.NONE or options > DateTimeOptions.CALENDAR: raise ValueError() super().__init__(target_culture, options, lazy_initialization) @@ -133,7 +132,11 @@ def initialize_configuration(self): )) self.register_model('DateTimeModel', Culture.Minimal, lambda options: DateTimeModel( - BaseMinimalMergedParser(BaseMinimalMergedParserConfiguration(dmyDateFormat=self.dmyDateFormat), options), + BaseMinimalMergedParser(BaseMinimalMergedParserConfiguration(), options), + BaseMinimalMergedExtractor(BaseMinimalMergedExtractorConfiguration(), options) + )) + self.register_model('DateTimeModel', Culture.Minimal2, lambda options: DateTimeModel( + BaseMinimalMergedParser(BaseMinimalMergedParserConfiguration(dmyDateFormat=False), options), BaseMinimalMergedExtractor(BaseMinimalMergedExtractorConfiguration(), options) )) @@ -142,8 +145,7 @@ def get_datetime_model(self, culture: str = None, fallback_to_default_culture: b def recognize_datetime(query: str, culture: str, options: DateTimeOptions = DateTimeOptions.NONE, - reference: datetime = None, fallback_to_default_culture: bool = True, - dmyDateFormat: bool = True) -> List[ModelResult]: - recognizer = DateTimeRecognizer(culture, options, dmyDateFormat=dmyDateFormat) + reference: datetime = None, fallback_to_default_culture: bool = True) -> List[ModelResult]: + recognizer = DateTimeRecognizer(culture, options) model = recognizer.get_datetime_model(culture, fallback_to_default_culture) return model.parse(query, reference) diff --git a/Python/libraries/recognizers-date-time/setup.py b/Python/libraries/recognizers-date-time/setup.py index f7d6502e5e..348b0c7f1e 100644 --- a/Python/libraries/recognizers-date-time/setup.py +++ b/Python/libraries/recognizers-date-time/setup.py @@ -10,7 +10,7 @@ def read(fname): NAME = 'recognizers-text-date-time-genesys' -VERSION = '1.1.13a1' +VERSION = '1.1.13a2' REQUIRES = ['recognizers-text-genesys', 'recognizers-text-number-genesys', 'recognizers-text-number-with-unit-genesys', 'regex', 'datedelta', 'python-dateutil'] diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/parsers.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/parsers.py index a16096fa89..551c014749 100644 --- a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/parsers.py +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/minimal/parsers.py @@ -22,20 +22,21 @@ def internal_number_extractor(self) -> Extractor: def connector_token(self) -> str: return MinimalNumericWithUnit.ConnectorToken - def __init__(self, culture_info: CultureInfo): + def __init__(self, culture_info: CultureInfo, decimal_point_separator=True): if culture_info is None: culture_info = CultureInfo(Culture.Minimal) super().__init__(culture_info) self._internal_number_extractor = MinimalNumberExtractor( NumberMode.DEFAULT) self._internal_number_parser = AgnosticNumberParserFactory.get_parser( - ParserType.NUMBER, MinimalNumberParserConfiguration(culture_info)) + ParserType.NUMBER, + MinimalNumberParserConfiguration(culture_info, decimal_point_separator=decimal_point_separator)) class MinimalCurrencyParserConfiguration(MinimalNumberWithUnitParserConfiguration): - def __init__(self, culture_info: CultureInfo = None): - super().__init__(culture_info) + def __init__(self, culture_info: CultureInfo = None, decimal_point_separator=True): + super().__init__(culture_info, decimal_point_separator) self.add_dict_to_unit_map(MinimalNumericWithUnit.CurrencySuffixList) self.add_dict_to_unit_map(MinimalNumericWithUnit.CurrencyPrefixList) self.currency_name_to_iso_code_map = MinimalNumericWithUnit.CurrencyNameToIsoCodeMap - self.currency_fraction_code_list = MinimalNumericWithUnit.FractionalUnitNameToCodeMap \ No newline at end of file + self.currency_fraction_code_list = MinimalNumericWithUnit.FractionalUnitNameToCodeMap diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/number_with_unit_recognizer.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/number_with_unit_recognizer.py index 31c5512786..711f470dad 100644 --- a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/number_with_unit_recognizer.py +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/number_with_unit_recognizer.py @@ -300,13 +300,19 @@ def initialize_configuration(self): ])) # endregion - # region Catalan + # region Minimal self.register_model('CurrencyModel', Culture.Minimal, lambda options: CurrencyModel([ ExtractorParserModel( BaseMergedUnitExtractor( MinimalCurrencyExtractorConfiguration()), BaseMergedUnitParser(MinimalCurrencyParserConfiguration())) ])) + self.register_model('CurrencyModel', Culture.Minimal2, lambda options: CurrencyModel([ + ExtractorParserModel( + BaseMergedUnitExtractor( + MinimalCurrencyExtractorConfiguration()), + BaseMergedUnitParser(MinimalCurrencyParserConfiguration(decimal_point_separator=False))) + ])) # endregion def get_age_model(self, culture: str = None, fallback_to_default_culture: bool = True) -> Model: diff --git a/Python/libraries/recognizers-number-with-unit/setup.py b/Python/libraries/recognizers-number-with-unit/setup.py index f2e708e178..2e379aeb39 100644 --- a/Python/libraries/recognizers-number-with-unit/setup.py +++ b/Python/libraries/recognizers-number-with-unit/setup.py @@ -10,7 +10,7 @@ def read(fname): NAME = "recognizers-text-number-with-unit-genesys" -VERSION = "1.1.13a1" +VERSION = "1.1.13a2" REQUIRES = ['recognizers-text-genesys', 'recognizers-text-number-genesys', 'regex'] setup( diff --git a/Python/libraries/recognizers-number/recognizers_number/number/minimal/parsers.py b/Python/libraries/recognizers-number/recognizers_number/number/minimal/parsers.py index 8d5f4442f7..5ea437a37c 100644 --- a/Python/libraries/recognizers-number/recognizers_number/number/minimal/parsers.py +++ b/Python/libraries/recognizers-number/recognizers_number/number/minimal/parsers.py @@ -1,4 +1,4 @@ -from typing import Dict, Pattern, Optional, List +from typing import Pattern, Optional, List import regex from recognizers_text.culture import Culture @@ -12,71 +12,15 @@ from recognizers_number.number.parsers import BaseNumberParser, NumberParserConfiguration -class MinimalNumberParserConfiguration(BaseNumberParserConfiguration): - - @property - def cardinal_number_map(self) -> Dict[str, int]: - return None - - @property - def ordinal_number_map(self) -> Dict[str, int]: - return None - - @property - def round_number_map(self) -> Dict[str, int]: - return None - - @property - def digital_number_regex(self) -> Pattern: - return None - - @property - def fraction_marker_token(self) -> str: - return None +class MinimalNumberParserConfiguration: @property def negative_number_sign_regex(self) -> Pattern: return self._negative_number_sign_regex - @property - def half_a_dozen_regex(self) -> Pattern: - return None - - @property - def half_a_dozen_text(self) -> str: - return None - - @property - def word_separator_token(self) -> str: - return None - - @property - def written_decimal_separator_texts(self) -> List[str]: - return None - - @property - def written_group_separator_texts(self) -> List[str]: - return None - - @property - def written_integer_separator_texts(self) -> List[str]: - return None - - @property - def written_fraction_separator_texts(self) -> List[str]: - return None - - @property - def non_standard_separator_variants(self) -> List[str]: - return None - @property def is_multi_decimal_separator_culture(self) -> bool: - return None - - @property - def round_multiplier_regex(self) -> Pattern: - return None + return False @property def culture_info(self): diff --git a/Python/libraries/recognizers-number/recognizers_number/number/number_recognizer.py b/Python/libraries/recognizers-number/recognizers_number/number/number_recognizer.py index cbd8ab03f5..e7fb5a79ef 100644 --- a/Python/libraries/recognizers-number/recognizers_number/number/number_recognizer.py +++ b/Python/libraries/recognizers-number/recognizers_number/number/number_recognizer.py @@ -39,8 +39,7 @@ class NumberOptions(IntFlag): class NumberRecognizer(Recognizer[NumberOptions]): def __init__(self, target_culture: str = None, options: NumberOptions = NumberOptions.NONE, - lazy_initialization: bool = True, decimal_point_separator: bool = True): - self.decimal_point_separator = decimal_point_separator + lazy_initialization: bool = True): if options < NumberOptions.NONE or options > NumberOptions.NONE: raise ValueError() super().__init__(target_culture, options, lazy_initialization) @@ -260,8 +259,12 @@ def initialize_configuration(self): # region Minimal self.register_model('NumberModel', Culture.Minimal, lambda options: NumberModel( AgnosticNumberParserFactory.get_parser( - ParserType.NUMBER, MinimalNumberParserConfiguration(decimal_point_separator= - self.decimal_point_separator)), + ParserType.NUMBER, MinimalNumberParserConfiguration()), + MinimalNumberExtractor(NumberMode.PURE_NUMBER) + )) + self.register_model('NumberModel', Culture.Minimal2, lambda options: NumberModel( + AgnosticNumberParserFactory.get_parser( + ParserType.NUMBER, MinimalNumberParserConfiguration(decimal_point_separator=False)), MinimalNumberExtractor(NumberMode.PURE_NUMBER) )) # endregion @@ -277,9 +280,8 @@ def get_percentage_model(self, culture: str = None, fallback_to_default_culture: def recognize_number(query: str, culture: str, options: NumberOptions = NumberOptions.NONE, - fallback_to_default_culture: bool = True, - decimal_point_separator: bool = True) -> List[ModelResult]: - recognizer = NumberRecognizer(culture, options, decimal_point_separator=decimal_point_separator) + fallback_to_default_culture: bool = True) -> List[ModelResult]: + recognizer = NumberRecognizer(culture, options) model = recognizer.get_number_model(culture, fallback_to_default_culture) return model.parse(query) diff --git a/Python/libraries/recognizers-number/setup.py b/Python/libraries/recognizers-number/setup.py index 2ff07606e3..94cb63cb13 100644 --- a/Python/libraries/recognizers-number/setup.py +++ b/Python/libraries/recognizers-number/setup.py @@ -10,7 +10,7 @@ def read(fname): NAME = "recognizers-text-number-genesys" -VERSION = "1.1.13a1" +VERSION = "1.1.13a2" REQUIRES = ['recognizers-text-genesys', 'regex'] setup( diff --git a/Python/libraries/recognizers-sequence/setup.py b/Python/libraries/recognizers-sequence/setup.py index 596d36d53b..3ce22db7ad 100644 --- a/Python/libraries/recognizers-sequence/setup.py +++ b/Python/libraries/recognizers-sequence/setup.py @@ -10,7 +10,7 @@ def read(fname): NAME = "recognizers-text-sequence-genesys" -VERSION = "1.1.13a1" +VERSION = "1.1.13a2" REQUIRES = ['recognizers-text-genesys', 'recognizers-text-number-genesys', 'regex'] setup( diff --git a/Python/libraries/recognizers-suite/setup.py b/Python/libraries/recognizers-suite/setup.py index 1ae0dbe20f..1eeb9c0d40 100644 --- a/Python/libraries/recognizers-suite/setup.py +++ b/Python/libraries/recognizers-suite/setup.py @@ -10,15 +10,15 @@ def read(fname): NAME = 'recognizers-text-suite-genesys' -VERSION = '1.1.13a1' +VERSION = '1.1.13a2' REQUIRES = [ - 'recognizers-text-genesys==1.1.13a1', - 'recognizers-text-number-genesys==1.1.13a1', - 'recognizers-text-number-with-unit-genesys==1.1.13a1', - 'recognizers-text-date-time-genesys==1.1.13a1', - 'recognizers-text-sequence-genesys==1.1.13a1', - 'recognizers-text-choice-genesys==1.1.13a1', - 'datatypes_timex_expression_genesys==1.1.13a1' + 'recognizers-text-genesys==1.1.13a2', + 'recognizers-text-number-genesys==1.1.13a2', + 'recognizers-text-number-with-unit-genesys==1.1.13a2', + 'recognizers-text-date-time-genesys==1.1.13a2', + 'recognizers-text-sequence-genesys==1.1.13a2', + 'recognizers-text-choice-genesys==1.1.13a2', + 'datatypes_timex_expression_genesys==1.1.13a2' ] setup( diff --git a/Python/libraries/recognizers-text/recognizers_text/culture.py b/Python/libraries/recognizers-text/recognizers_text/culture.py index 832d1732ef..04d9cb2fd0 100644 --- a/Python/libraries/recognizers-text/recognizers_text/culture.py +++ b/Python/libraries/recognizers-text/recognizers_text/culture.py @@ -15,6 +15,7 @@ class Culture: German: str = 'de-de' Catalan: str = 'ca-es' Minimal: str = 'min' + MinimalOther: str = 'min-oth' @staticmethod def _get_supported_culture_codes(): diff --git a/Python/libraries/recognizers-text/setup.py b/Python/libraries/recognizers-text/setup.py index d53fe5afa6..edbf75453b 100644 --- a/Python/libraries/recognizers-text/setup.py +++ b/Python/libraries/recognizers-text/setup.py @@ -4,7 +4,7 @@ from setuptools import setup, find_packages NAME = "recognizers-text-genesys" -VERSION = "1.1.13a1" +VERSION = "1.1.13a2" REQUIRES = ['emoji==1.1.0', 'multipledispatch'] setup( diff --git a/Python/tests/runner.py b/Python/tests/runner.py index 2a59935b6e..3de37503ea 100644 --- a/Python/tests/runner.py +++ b/Python/tests/runner.py @@ -72,21 +72,21 @@ def get_specs(recognizer, entity): ENTITY_PATTERN = re.compile('(.*)(Model|Parser|Extractor|Resolver)(.*)') CULTURES = { - 'Arabic': Culture.Arabic, - 'Chinese': Culture.Chinese, - 'Catalan': Culture.Catalan, - 'Dutch': Culture.Dutch, - 'English': Culture.English, - 'EnglishOthers': Culture.EnglishOthers, - 'French': Culture.French, - 'Italian': Culture.Italian, - 'Japanese': Culture.Japanese, - 'Korean': Culture.Korean, - 'Portuguese': Culture.Portuguese, - 'Spanish': Culture.Spanish, - 'SpanishMexican': Culture.SpanishMexican, - 'Turkish': Culture.Turkish, - 'German': Culture.German, + # 'Arabic': Culture.Arabic, + # 'Chinese': Culture.Chinese, + # 'Catalan': Culture.Catalan, + # 'Dutch': Culture.Dutch, + # 'English': Culture.English, + # 'EnglishOthers': Culture.EnglishOthers, + # 'French': Culture.French, + # 'Italian': Culture.Italian, + # 'Japanese': Culture.Japanese, + # 'Korean': Culture.Korean, + # 'Portuguese': Culture.Portuguese, + # 'Spanish': Culture.Spanish, + # 'SpanishMexican': Culture.SpanishMexican, + # 'Turkish': Culture.Turkish, + # 'German': Culture.German, 'Minimal': Culture.Minimal } From 7d2bb9a10d0e81f9c46bb5fc694d13f413b454ec Mon Sep 17 00:00:00 2001 From: Sam Hickey Date: Thu, 22 Feb 2024 14:35:57 +0000 Subject: [PATCH 7/8] Fix tests --- .../recognizers_date_time/date_time/date_time_recognizer.py | 2 +- .../number_with_unit/number_with_unit_recognizer.py | 2 +- .../recognizers_number/number/number_recognizer.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py index 0fdf58ddde..280cfb16a1 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/date_time_recognizer.py @@ -135,7 +135,7 @@ def initialize_configuration(self): BaseMinimalMergedParser(BaseMinimalMergedParserConfiguration(), options), BaseMinimalMergedExtractor(BaseMinimalMergedExtractorConfiguration(), options) )) - self.register_model('DateTimeModel', Culture.Minimal2, lambda options: DateTimeModel( + self.register_model('DateTimeModel', Culture.MinimalOther, lambda options: DateTimeModel( BaseMinimalMergedParser(BaseMinimalMergedParserConfiguration(dmyDateFormat=False), options), BaseMinimalMergedExtractor(BaseMinimalMergedExtractorConfiguration(), options) )) diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/number_with_unit_recognizer.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/number_with_unit_recognizer.py index 711f470dad..f84c97493a 100644 --- a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/number_with_unit_recognizer.py +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/number_with_unit_recognizer.py @@ -307,7 +307,7 @@ def initialize_configuration(self): MinimalCurrencyExtractorConfiguration()), BaseMergedUnitParser(MinimalCurrencyParserConfiguration())) ])) - self.register_model('CurrencyModel', Culture.Minimal2, lambda options: CurrencyModel([ + self.register_model('CurrencyModel', Culture.MinimalOther, lambda options: CurrencyModel([ ExtractorParserModel( BaseMergedUnitExtractor( MinimalCurrencyExtractorConfiguration()), diff --git a/Python/libraries/recognizers-number/recognizers_number/number/number_recognizer.py b/Python/libraries/recognizers-number/recognizers_number/number/number_recognizer.py index e7fb5a79ef..38839f93ba 100644 --- a/Python/libraries/recognizers-number/recognizers_number/number/number_recognizer.py +++ b/Python/libraries/recognizers-number/recognizers_number/number/number_recognizer.py @@ -262,7 +262,7 @@ def initialize_configuration(self): ParserType.NUMBER, MinimalNumberParserConfiguration()), MinimalNumberExtractor(NumberMode.PURE_NUMBER) )) - self.register_model('NumberModel', Culture.Minimal2, lambda options: NumberModel( + self.register_model('NumberModel', Culture.MinimalOther, lambda options: NumberModel( AgnosticNumberParserFactory.get_parser( ParserType.NUMBER, MinimalNumberParserConfiguration(decimal_point_separator=False)), MinimalNumberExtractor(NumberMode.PURE_NUMBER) From 3722ab05cb9379289ecd318ac9b60e5f8d614db8 Mon Sep 17 00:00:00 2001 From: Sam Hickey Date: Thu, 22 Feb 2024 14:40:50 +0000 Subject: [PATCH 8/8] Unskip tests --- Python/tests/runner.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Python/tests/runner.py b/Python/tests/runner.py index 3de37503ea..2a59935b6e 100644 --- a/Python/tests/runner.py +++ b/Python/tests/runner.py @@ -72,21 +72,21 @@ def get_specs(recognizer, entity): ENTITY_PATTERN = re.compile('(.*)(Model|Parser|Extractor|Resolver)(.*)') CULTURES = { - # 'Arabic': Culture.Arabic, - # 'Chinese': Culture.Chinese, - # 'Catalan': Culture.Catalan, - # 'Dutch': Culture.Dutch, - # 'English': Culture.English, - # 'EnglishOthers': Culture.EnglishOthers, - # 'French': Culture.French, - # 'Italian': Culture.Italian, - # 'Japanese': Culture.Japanese, - # 'Korean': Culture.Korean, - # 'Portuguese': Culture.Portuguese, - # 'Spanish': Culture.Spanish, - # 'SpanishMexican': Culture.SpanishMexican, - # 'Turkish': Culture.Turkish, - # 'German': Culture.German, + 'Arabic': Culture.Arabic, + 'Chinese': Culture.Chinese, + 'Catalan': Culture.Catalan, + 'Dutch': Culture.Dutch, + 'English': Culture.English, + 'EnglishOthers': Culture.EnglishOthers, + 'French': Culture.French, + 'Italian': Culture.Italian, + 'Japanese': Culture.Japanese, + 'Korean': Culture.Korean, + 'Portuguese': Culture.Portuguese, + 'Spanish': Culture.Spanish, + 'SpanishMexican': Culture.SpanishMexican, + 'Turkish': Culture.Turkish, + 'German': Culture.German, 'Minimal': Culture.Minimal }