Source code for languagechange.search
"""Helper utilities for searching corpora for target terms."""
from typing import List, Set
[docs]
def expand_dictionary(words: List[str]):
"""Placeholder for future dictionary expansion utilities.
Args:
words (List[str]): Words to expand into additional search terms.
"""
raise NotImplementedError
[docs]
class SearchTerm:
"""Describes a search target and the features to scan within a corpus line."""
VALID_WORD_FEATURES = ['lemma', 'token', 'pos']
def __init__(self, term: str, regex: bool = False, word_feature: str | Set = 'token'):
"""Initialise a search term for corpus queries.
Args:
term (str): The string pattern to look for.
regex (bool, optional): Whether to treat the term as a regular expression. Defaults to False.
word_feature (str|Set, optional): Features to consider ('token', 'lemma', 'pos'). Defaults to 'token'.
"""
self.term = term
self.regex = regex
self.word_feature = word_feature if isinstance(word_feature, Set) else {word_feature}
if not self.word_feature.issubset(self.VALID_WORD_FEATURES):
raise ValueError("'word_feature' must be set to one of the following values:", self.VALID_WORD_FEATURES)