Source code for languagechange.usages

import enum
import pickle
import logging
import os
import re

import jsonlines

from pathlib import Path

from languagechange.utils import Time



[docs]
class POS(enum.Enum):
   NOUN = 1
   VERB = 2
   ADJECTIVE = 3
   ADVERB = 4




[docs]
class Target:
    def __init__(self, target : str):
        self.target = target


[docs]
    def set_lemma(self, lemma: str):
        self.lemma = lemma



[docs]
    def set_pos(self, pos:POS):
        self.pos = pos


    def __str__(self):
        return self.target

    def __hash__(self):
        return hash(self.target)




[docs]
class TargetUsage:
    def __init__(self, text: str, offsets: str, time: Time = None, **kwargs):
        self.text_ = text
        self.offsets = offsets
        self.time = time
        self.__dict__.update(kwargs)


[docs]
    def text(self):
        return self.text_



[docs]
    def start(self):
        return self.offsets[0]



[docs]
    def end(self):
        return self.offsets[1]



[docs]
    def time(self):
        return self.time



[docs]
    def to_dict(self):
        d = self.__dict__
        d['time'] = str(d['time'])
        return d


    def __getitem__(self,item):
        return self.text_[item]

    def __str__(self):
        return self.text_




[docs]
class DWUGUsage(TargetUsage):

    def __init__(self, target, date, grouping, identifier, description,  **args):
        super().__init__(**args)
        self.target = target
        self.date = date
        self.grouping = grouping
        self.identifier = identifier
        self.description = description




[docs]
class TargetUsageList(list):


[docs]
    def save(self, path, target):
        Path(path).mkdir(parents=True, exist_ok=True)
        with open(os.path.join(path,target), 'wb+') as f:
            pickle.dump(self,f)



[docs]
    def load(path, target):
        with open(os.path.join(path,target),'rb') as f:
            return pickle.load(f)



[docs]
    def time_axis(self):
        return [usage.time for usage in self]



[docs]
    def to_dict(self):
        return [tu.to_dict() for tu in self]





[docs]
class UsageDictionary(dict):


[docs]
    def save(self, path, words = {}):
        Path(path).mkdir(parents=True, exist_ok=True)

        if words == {}:
            words = set(self.keys())
        else:
            words = set(words)
        words_not_present = words.difference(set(self.keys()))
        if len(words_not_present) != 0:
            logging.info(f'Words {words_not_present} are not in the usage dictionary')
        
        for k in set(self.keys()).intersection(words):
            output_fn = f"{path}/{k}_usages.jsonl"
            with jsonlines.open(output_fn, 'w') as writer:
                tul = self[k].to_dict()
                for i, tu in enumerate(tul):
                    tul[i] = {'text': tu['text_']} | tu # replace the 'text_' key with a 'text' key
                    tul[i].pop('text_')
                writer.write_all(tul)
                logging.info(f"Usages written to {output_fn}")



[docs]
    def load(self, path, words = set()):
        if not os.path.exists(path):
            logging.error(f'Path {path} does not exist.')
            return None
        self.clear()
        words = set(words)
        for fn in os.listdir(path):
            match = re.findall(r'([a-zA-Z]*)_usages\.jsonl', fn)
            if len(match) != 0:
                lemma = match[0]
                if lemma in words or len(words) == 0:
                    with jsonlines.open(os.path.join(path, fn), 'r') as reader:
                        self[lemma] = TargetUsageList(TargetUsage(**tu) for tu in reader)
                        logging.info(f"Loaded usages from {os.path.join(path, fn)}")
        not_loaded_words = words.difference(set(self.keys()))
        if len(not_loaded_words) != 0:
            logging.info(f"Could not find usages for words {not_loaded_words}")