Source code for languagechange.usages
import enum
import pickle
import logging
import os
import re
import jsonlines
from pathlib import Path
from languagechange.utils import Time
[docs]
class POS(enum.Enum):
NOUN = 1
VERB = 2
ADJECTIVE = 3
ADVERB = 4
[docs]
class Target:
def __init__(self, target : str):
self.target = target
[docs]
def set_lemma(self, lemma: str):
self.lemma = lemma
[docs]
def set_pos(self, pos:POS):
self.pos = pos
def __str__(self):
return self.target
def __hash__(self):
return hash(self.target)
[docs]
class TargetUsage:
def __init__(self, text: str, offsets: str, time: Time = None, **kwargs):
self.text_ = text
self.offsets = offsets
self.time = time
self.__dict__.update(kwargs)
[docs]
def text(self):
return self.text_
[docs]
def start(self):
return self.offsets[0]
[docs]
def end(self):
return self.offsets[1]
[docs]
def time(self):
return self.time
[docs]
def to_dict(self):
d = self.__dict__
d['time'] = str(d['time'])
return d
def __getitem__(self,item):
return self.text_[item]
def __str__(self):
return self.text_
[docs]
class DWUGUsage(TargetUsage):
def __init__(self, target, date, grouping, identifier, description, **args):
super().__init__(**args)
self.target = target
self.date = date
self.grouping = grouping
self.identifier = identifier
self.description = description
[docs]
class TargetUsageList(list):
[docs]
def save(self, path, target):
Path(path).mkdir(parents=True, exist_ok=True)
with open(os.path.join(path,target), 'wb+') as f:
pickle.dump(self,f)
[docs]
def load(path, target):
with open(os.path.join(path,target),'rb') as f:
return pickle.load(f)
[docs]
def time_axis(self):
return [usage.time for usage in self]
[docs]
def to_dict(self):
return [tu.to_dict() for tu in self]
[docs]
class UsageDictionary(dict):
[docs]
def save(self, path, words = {}):
Path(path).mkdir(parents=True, exist_ok=True)
if words == {}:
words = set(self.keys())
else:
words = set(words)
words_not_present = words.difference(set(self.keys()))
if len(words_not_present) != 0:
logging.info(f'Words {words_not_present} are not in the usage dictionary')
for k in set(self.keys()).intersection(words):
output_fn = f"{path}/{k}_usages.jsonl"
with jsonlines.open(output_fn, 'w') as writer:
tul = self[k].to_dict()
for i, tu in enumerate(tul):
tul[i] = {'text': tu['text_']} | tu # replace the 'text_' key with a 'text' key
tul[i].pop('text_')
writer.write_all(tul)
logging.info(f"Usages written to {output_fn}")
[docs]
def load(self, path, words = set()):
if not os.path.exists(path):
logging.error(f'Path {path} does not exist.')
return None
self.clear()
words = set(words)
for fn in os.listdir(path):
match = re.findall(r'([a-zA-Z]*)_usages\.jsonl', fn)
if len(match) != 0:
lemma = match[0]
if lemma in words or len(words) == 0:
with jsonlines.open(os.path.join(path, fn), 'r') as reader:
self[lemma] = TargetUsageList(TargetUsage(**tu) for tu in reader)
logging.info(f"Loaded usages from {os.path.join(path, fn)}")
not_loaded_words = words.difference(set(self.keys()))
if len(not_loaded_words) != 0:
logging.info(f"Could not find usages for words {not_loaded_words}")