Source code for talkgooder

# SPDX-License-Identifier: MIT

import re
from pprint import pprint  # noqa E401
import sys  # noqa E401

"""Utilities to smooth out language rules.

``talkgooder`` attempts to smooth out grammar, punctuation, and number-related corner cases when
formatting text for human consumption. It is intended for applications where you know there's a
noun and are trying to generate text, but you don't know much about it.
"""


[docs] def plural( text: str, number: int | float, language="en-US", addl_same=[], addl_special_s=[], addl_irregular={}, caps_mode=0, ) -> str: """Determine the plural of a noun depending upon quantity. Given a quantity of nouns, return the most likely plural form. Language is complicated and pluralization rules are not always consistent, so this function supports user-supplied rules to accommodate exceptions specific to the situation. **Supported locales:** * ``en-US``: American English Args: text (str): The noun to convert. number (int or float): The quantity of nouns. language (str): Which language rules to apply, specified by locale (default: ``en-US``). addl_same (list): Additional words where the singular and plural are the same. addl_special_s (list): Additional words that always end in s for odd reasons (e.g., ``["piano","hello",...]``). addl_irregular (dict): Additional pairs of irregular plural nouns (e.g., ``{"mouse": "mice", "person": "people", ...}``). caps_mode (int): * ``0``: Attempt to infer whether suffix is lower or upper case (default). * ``1``: Force suffix to be upper case. * ``2``: Force suffix to be lower case. Returns: String: The plural of the provided noun. Raises: TypeError: Text must be a string. ValueError: Language must be a supported locale. """ # Thanks to Grammarly for publishing a guideline that helped inspire these rules: # https://www.grammarly.com/blog/irregular-plural-nouns/ # Make sure something reasonable was supplied if not isinstance(number, (int, float)): raise TypeError("Number must be an int or a float") if language.lower() == "en-us": # Same singular as plural, can be extended via addl_same parameter en_us_same = [ "aircraft", "buffalo", "deer", "fish", "goose", "hovercraft", "moose", "salmon", "sheep", "shrimp", "spacecraft", "trout", "watercraft", ] + addl_same # Doesn't follow other rules, plural is always s, can be extended via addl_special_s en_us_special_s = [ "cello", "hello", "photo", "piano", "proof", "roof", "spoof", "zero", ] + addl_special_s # Irregular plurals where there's no rule, it just is, can be extended via addl_irregular en_us_irregular = dict( list( { "child": "children", "criterion": "criteria", "die": "dice", "louse": "lice", "man": "men", "mouse": "mice", "ox": "oxen", "person": "people", "phenomenon": "phenomena", "tooth": "teeth", "woman": "women", }.items() ) + list(addl_irregular.items()) ) # Consonent before y pattern en_us_ies_pattern = re.compile( r"[b-df-hj-np-tv-z]+y$", re.IGNORECASE, ) # If the entire word is upper case or caps_mode is 1, capitalize it if caps_mode == 2: casing = "lower" elif text.isupper() or caps_mode == 1: casing = "upper" else: casing = "lower" if casing == "upper": i = "I" a = "A" ices = "ICES" es = "ES" ies = "IES" ves = "VES" s = "S" else: i = "i" a = "a" ices = "ices" es = "es" ies = "ies" ves = "ves" s = "s" # If the number is an integer that is exactly 1, nothing to do if isinstance(number, int) and number == 1: return text # If the word is the same whether singular or plural, nothing to do if text.lower() in en_us_same: return text # Some words follow no rules whatsoever for item in en_us_irregular.keys(): if text.lower().endswith(item.lower()): if text.isupper(): return en_us_irregular[item].upper() else: return en_us_irregular[item] if text.lower() in en_us_special_s: # Certain words always end with s for Reasons return "%s%s" % (text, s) if text.lower().endswith("us"): # Words that end in "us" change to "i" when plural return "%s%s" % (text[:-2], i) if text.lower().endswith("um"): # Words that end in "um" change to "a" when plural return "%s%s" % (text[:-2], a) if text.lower().endswith(("ix", "ex")): # Words that end in "ix" or "ex" change to "ices" when plural return "%s%s" % (text[:-2], ices) if text.lower().endswith(("o", "s", "x", "z", "ch", "sh", "is")): # Words that end in "o", "s", "x", "z", "ch", "sh", and "is" change to "es" when plural return "%s%s" % (text, es) if text.lower().endswith(("f", "fe")): # Words that end in "f" or "fe" end in "ves" when plural return "%s%s" % (text[:-1], ves) if en_us_ies_pattern.findall(text): # Words that end in a consonant then "y" end in "ies" when plural return "%s%s" % (text[:-1], ies) # Remaining words end in "s" when plural return "%s%s" % (text, s) else: raise ValueError("Language must be a supported locale.")
[docs] def possessive( text: str, language="en-US", caps_mode=0, ) -> str: """Convert a noun to its possessive, because apostrophes can be hard. **Supported locales:** * ``en-US``: American English Args: text (str): A noun to be made possessive. language (str): Which language rules to apply (default ``en-US``). caps_mode (int): * ``0``: Attempt to infer whether suffix is lower or upper case (default). * ``1``: Force suffix to be upper case. * ``2``: Force suffix to be lower case. Returns: String: The possessive of the provided string. Raises: TypeError: Text must be a string. ValueError: Language must be a supported locale. """ if not isinstance(text, str): raise TypeError("Text must be a string") if language.lower() == "en-us": if text.endswith("s"): # When a noun ends in "s", just add an apostrophe return "%s'" % text else: if caps_mode == 2: # Force lower case return "%s's" % text elif text.isupper() or caps_mode == 1: # Force upper case or detect upper case return "%s'S" % text # Default is lower else: return "%s's" % text else: raise ValueError("Language must be a supported locale.")
[docs] def num2word( number: int, language="en-US", ) -> str: """Determine if an integer should be expanded to a word (per the APA style manual). The APA style manual specifies integers between 1 and 9 should be written out as a word. Everything else should be represented as digits. **Supported locales:** * ``en-US``: American English Args: number (int): An integer. language (str): Which language rules to apply (default ``en-US``). Returns: String: The word or string-formatted number, as appropriate. Raises: TypeError: Number must be an int. ValueError: Language must be a supported locale. """ # Make sure something reasonable was supplied if not isinstance(number, int): raise TypeError("Number must be an int.") # Per APA style guide, only 1-9 should be expanded if number < 1 or number > 9: return str(number) if language.lower() == "en-us": numbers = [ "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", ] else: raise ValueError("Language must be a supported locale.") return numbers[number - 1]
[docs] def isAre( number: int | float, language="en-US", ) -> str: """Given a quanity, determine if article should be ``is`` or ``are``. Given a quantity of nouns or noun-equivalents, determine whether the article should be ``is`` or ``are``. For example, "there is one cat," and "there are two cats." **Supported locales:** * ``en-US``: American English Args: number (int | float): Quantity of items. language (str): Which language rules to apply, specified by locale (default ``en-US``). Returns: String: ``is`` or ``are``, as appropriate. Raises: TypeError: number must be an int or float. ValueError: language must be a supported locale. """ if not isinstance(number, (int, float)): raise TypeError("Number must be an int or a float.") if language.lower() == "en-us": # Anything other than integer 1 (even 1.0) uses "are" if number == 1 and isinstance(number, int): return "is" else: return "are" else: raise ValueError("Language must be a supported locale.")
[docs] def wasWere( number: int | float, language="en-US", ) -> str: """Given a quanity, determine if article should be ``ws`` or ``were``. Given a quantity of nouns or noun-equivalents, determine whether the article should be ``was`` or ``were``. For example, "there was one cat," and "there were two cats." **Supported locales:** * ``en-US``: American English Args: number (int | float): Quantity of items. language (str): Which language rules to apply, specified by locale (default ``en-US``). Returns: String: ``was`` or ``were``, as appropriate. Raises: TypeError: number must be an int or float. ValueError: language must be a supported locale. """ if not isinstance(number, (int, float)): raise TypeError("Number must be an int or a float.") if language.lower() == "en-us": # Anything other than integer 1 (even 1.0) uses "were" if number == 1 and isinstance(number, int): return "was" else: return "were" else: raise ValueError("Language must be a supported locale.")
[docs] def aAn( noun: str | int | float, language="en-US", ) -> str: """Given a noun or noun-equivalent, determine whether the article is ``a`` or ``an``. Nouns and noun-equivalents with a soft vowel beginning generally use ``an``, and everything else uses ``a``. **Supported locales:** * ``en-US``: American English Args: noun (str | int | float): A noun or noun-equivalent, as a word or a number. language (str): Which language rules to apply, specified by locale (default ``en-US``). Returns: String: ``a`` or ``an``, as appropriate. Raises: TypeError: Noun must be a string, int, or float. ValueError: Language must be a supported locale. """ if not isinstance(noun, (str, int, float)): raise TypeError("Noun must be a string, int, or float.") if language.lower() == "en-us": # Vowels, numbers that start with 8, and 18 use the "an" article if ( str(noun).lower().startswith(("a", "e", "i", "o", "u", "8", "18.")) or str(noun) == "18" ): return "an" else: return "a" else: raise ValueError("Language must be a supported locale.")