Source code for languagechange.models.change.metrics

from scipy.spatial.distance import cdist, cosine
from languagechange.models.meaning.clustering import Clustering
import numpy as np
from collections import Counter
from scipy.spatial.distance import jensenshannon
from typing import List, Union

[docs] class ChangeModel(): def __init__(self): pass
[docs] class BinaryChange(ChangeModel): def __init__(self): pass
[docs] def predict(self): pass
[docs] class GradedChange(ChangeModel): def __init__(self): pass
[docs] def compute_scores(vectors_list): pass
[docs] class Threshold(BinaryChange): def __init__(self): pass
[docs] def set_threshold(self, threshold): self.threshold = threshold
[docs] class AutomaticThrehold(Threshold): def __init__(self): pass
[docs] def compute_threshold(self, scores, func = lambda x: np.mean(x)): self.threshold = func(scores)
[docs] class OptimalThrehold(Threshold): def __init__(self): pass
[docs] def compute_threshold(self, scores, vrange=np.arange(0.,1.), evaluator=None): best_score = None best_threshold = None for v in vrange: labels = np.array(scores < v, dtype=int) score = evaluator(labels) if score > best_score or best_score == None: best_score = score best_threshold = v self.threshold = best_threshold
[docs] class APD(GradedChange): def __init__(self): pass
[docs] def compute_scores(self, embeddings1, embeddings2, metric='cosine'): return np.mean(cdist(embeddings1, embeddings2, metric=metric))
[docs] class PRT(GradedChange): def __init__(self): pass
[docs] def compute_scores(self, embeddings1, embeddings2, metric='cosine'): return cosine(embeddings1.mean(axis=0), embeddings2.mean(axis=0))
[docs] class PJSD(GradedChange): def __init__(self): pass
[docs] def compute_scores(self, embeddings1, embeddings2, clustering_algorithm, metric='cosine'): clustering = Clustering(clustering_algorithm) clustering.get_cluster_results(np.concatenate((embeddings1,embeddings2),axis=0)) labels1 = clustering.labels[:len(embeddings1)] labels2 = clustering.labels[len(embeddings1):] labels = set(clustering.labels) count1 = Counter(labels1) count2 = Counter(labels2) p,q = [], [] for l in labels: if l in count1: p.append(count1[l]/len(embeddings1)) else: p.append(0.) if l in count2: q.append(count2[l]/len(embeddings2)) else: q.append(0.) return jensenshannon(p, q)