Module conversationkg.conversations.entities

Expand source code
# -*- coding: utf-8 -*-

from .ledger import Universe

from urllib.parse import urlparse

import numpy as np


#%%

#class Universe(type):
#    def __call__(cls, *args, **kwargs):
#        obj = type.__call__(cls, *args, **kwargs)
#        return obj


class EntityUniverse(type, metaclass=Universe):
    entities = dict()
    n_duplicates = 0
    duplicates = set()
    
    def __call__(cls, *args, **kwargs):
        obj = type.__call__(cls, *args, **kwargs)  

        if hash(obj) in EntityUniverse.entities:
            EntityUniverse.n_duplicates += 1
            EntityUniverse.duplicates.add(obj)
            return EntityUniverse.entities[hash(obj)]
        else:
            EntityUniverse.entities[hash(obj)] = obj
            return obj
    
    @classmethod
    def reset(cls):
        cls.entities = dict()
        cls.n_duplicates = 0
        cls.duplicates = set()
        
        
        

class Entity(metaclass=EntityUniverse):
    def __init__(self, name):
        self.name = name
#        for name, val in other_properties:
#            setattr(self, name, val)

    def __eq__(self, other):
        if not type(self) == type(other):
            return False
        return self.name == other.name
    
    # hash __class__ to ensure different entity types are discernible by hash
    def __hash__(self):
        return hash((self.__class__, self.name)) 
  
    def __repr__(self):
        return f"{self.__class__.__name__}({self.name})"
    
    def __str__(self):
        return f"{self.name}"
    
    def to_json(self):
        d = dict(name=self.name)
        d["class"] = self.__class__.__name__
#        d["class"] = "Entity"
        return d
    
    @classmethod
    def from_json(cls, json_dict):
        if not cls.__name__ == json_dict["class"]:
            raise ValueError(f"Passed JSON dict for class {json_dict['class']} "
                             f"to {cls.__name__}")
        
        return cls(json_dict["name"])
        
        
    
class Person(Entity):
    def __init__(self, name, address):            
        self.address = Address(address)               
        self.organisation = self.address.org_from_address
        
        if not name: name = ""        
        name = name.strip("'").strip('"').replace("\n", " ")
        if name == self.address.name.lower():
            name = ""        
        
        super().__init__(name)
    
    def __eq__(self, other):
        return super().__eq__(other) and self.address == other.address
    
    def __hash__(self):
        return hash((super().__hash__(), self.address))
    
    def __repr__(self):
        return f"Person({self.name}, {self.address.name})"
    
    def __str__(self):
        return f"{self.name} <{self.address.name}>"
    
    def to_json(self):
        d = super().to_json()
        d["address"] = self.address.name
        return d
    
    @classmethod
    def from_json(cls, json_dict):
        return cls(json_dict["name"],
                   json_dict["address"])
        
        
class Organisation(Entity):
    def __init__(self, name, domain):
        self.domain = domain
        super().__init__(name)
        
    def __eq__(self, other):
        return super().__eq__(other) and self.domain == other.domain
    
    def __hash__(self):
        return hash((super().__hash__(), self.domain))
        
    
    def __repr__(self):
        return f"Organisation({self.name}, {self.domain})"
    
    def __str__(self):
        return f"{self.name} <{self.domain}>"

    def to_json(self):
        d = super().to_json()
        d["domain"] = self.domain
        return d
    
    @classmethod
    def from_json(cls, json_dict):
        return cls(json_dict["name"],
                   json_dict["domain"])



# could also be called SimpleEntity, 
# since it really only consists of a name (and is of type str)
class StringEntity(Entity, str):
    def __new__(cls, string):
        return str.__new__(cls, string)
    
    def __init__(self, string):
        Entity.__init__(self, string)
          
    
class Address(StringEntity):
    def __new__(cls, address):
        if not address: address = ""
        domain = address[address.rfind("@")+1:]
        if not domain: address = ""
        return super().__new__(cls, address)

    def __init__(self, address):
        domain = address[address.rfind("@")+1:]
        self.domain = domain
        self.local_part = address[:address.rfind("@")]
        
        domain_prefix = domain[:domain.find(".")]
        self.org_from_address = Organisation(domain_prefix, domain)
        super().__init__(address)
        
        

class Link(StringEntity):
    def __init__(self, url):
        try:
            parsed = urlparse(url)
            self.domain = parsed.netloc
            self.path = parsed.path
        except ValueError:
            self.domain = None
            self.path = None
        super().__init__(url)

        
class KeyWord(StringEntity, str):
    pass



class Topic(Entity):
    def __init__(self, index, word_dist, words=None):
        self.index = index
        self.word_dist = np.asarray(word_dist)
        self.word_dist.flags.writeable = False
        self.words = words if words else range(self.word_dist.size)
        self.sorted_words = tuple(self.words[i] for i in self.word_dist.argsort())
        
        super().__init__(str(index))
        
        
    def top_words(self, n, include_probs=False):
        if include_probs:
            return [(self.word_dist[i], KeyWord(self.words[i])) 
                    for i in self.word_dist.argsort()[:n]]
        return [KeyWord(w) for w in self.sorted_words[:n]]
    
    def __str__(self):
        return f"Topic {self.index}: " + f"{list(map(str, self.top_words(5)))}"[1:-1]
    
    def __repr__(self):
        probs, words = list(zip(*self.top_words(5, include_probs=True)))
        return f"Topic({self.index}, {probs}, {words})"
    
    def __hash__(self):
        return hash((self.index, self.word_dist.tostring()))
    
    def __eq__(self, other):
        if not isinstance(other, Topic):
            return False
        return self.index == other.index and np.array_equal(self.word_dist, other.word_dist)
    
    
    def to_json(self):
        return {"class": self.__class__.__name__,
                "index": self.index,
                "words": self.words,
                "word_dist": self.word_dist.tolist()}
    
    @classmethod
    def from_json(cls, json_dict):
        params = [json_dict[k] for k in ["index", "word_dist", "words"]]
        return cls(*params)


class TopicInstance:  
    def __init__(self, topic, confidence):
        self.topic = topic
        self.index = topic.index
        self.score = confidence
        
    def __str__(self):
        return f"Prob[{self.topic}] = {self.score:0.3f}"
    
    def __repr__(self):
        return f"TopicInstance({repr(self.topic)}, {self.score})"
    
    def __eq__(self, other):
        if isinstance(other, Topic):
            return self.topic == other
        elif isinstance(other, TopicInstance):
            return self.topic == other.topic
        else:
            return False
        
    def __hash__(self):
        return hash(self.topic)
    
    def to_json(self):
        d = {"class":self.__class__.__name__, "score":self.score}
        d["topic"] = self.topic.to_json(dumps=False)
        return d
    
    @classmethod
    def from_json(cls, json_dict):
        topic = Topic.from_json(json_dict["topic"])
        score = json_dict["score"]
        return cls(topic, score)
    


    

    
#class DynamicEntity(metaclass=EntityUniverse):
    
    
        

Classes

class Address (address)

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.str() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.

Expand source code
class Address(StringEntity):
    def __new__(cls, address):
        if not address: address = ""
        domain = address[address.rfind("@")+1:]
        if not domain: address = ""
        return super().__new__(cls, address)

    def __init__(self, address):
        domain = address[address.rfind("@")+1:]
        self.domain = domain
        self.local_part = address[:address.rfind("@")]
        
        domain_prefix = domain[:domain.find(".")]
        self.org_from_address = Organisation(domain_prefix, domain)
        super().__init__(address)

Ancestors

class Entity (name)
Expand source code
class Entity(metaclass=EntityUniverse):
    def __init__(self, name):
        self.name = name
#        for name, val in other_properties:
#            setattr(self, name, val)

    def __eq__(self, other):
        if not type(self) == type(other):
            return False
        return self.name == other.name
    
    # hash __class__ to ensure different entity types are discernible by hash
    def __hash__(self):
        return hash((self.__class__, self.name)) 
  
    def __repr__(self):
        return f"{self.__class__.__name__}({self.name})"
    
    def __str__(self):
        return f"{self.name}"
    
    def to_json(self):
        d = dict(name=self.name)
        d["class"] = self.__class__.__name__
#        d["class"] = "Entity"
        return d
    
    @classmethod
    def from_json(cls, json_dict):
        if not cls.__name__ == json_dict["class"]:
            raise ValueError(f"Passed JSON dict for class {json_dict['class']} "
                             f"to {cls.__name__}")
        
        return cls(json_dict["name"])

Subclasses

Static methods

def from_json(json_dict)
Expand source code
@classmethod
def from_json(cls, json_dict):
    if not cls.__name__ == json_dict["class"]:
        raise ValueError(f"Passed JSON dict for class {json_dict['class']} "
                         f"to {cls.__name__}")
    
    return cls(json_dict["name"])

Methods

def to_json(self)
Expand source code
    def to_json(self):
        d = dict(name=self.name)
        d["class"] = self.__class__.__name__
#        d["class"] = "Entity"
        return d
class EntityUniverse (*args, **kwargs)

type(object_or_name, bases, dict) type(object) -> the object's type type(name, bases, dict) -> a new type

Expand source code
class EntityUniverse(type, metaclass=Universe):
    entities = dict()
    n_duplicates = 0
    duplicates = set()
    
    def __call__(cls, *args, **kwargs):
        obj = type.__call__(cls, *args, **kwargs)  

        if hash(obj) in EntityUniverse.entities:
            EntityUniverse.n_duplicates += 1
            EntityUniverse.duplicates.add(obj)
            return EntityUniverse.entities[hash(obj)]
        else:
            EntityUniverse.entities[hash(obj)] = obj
            return obj
    
    @classmethod
    def reset(cls):
        cls.entities = dict()
        cls.n_duplicates = 0
        cls.duplicates = set()

Ancestors

  • builtins.type

Class variables

var duplicates
var entities
var n_duplicates

Static methods

def reset()
Expand source code
@classmethod
def reset(cls):
    cls.entities = dict()
    cls.n_duplicates = 0
    cls.duplicates = set()
class KeyWord (string)

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.str() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.

Expand source code
class KeyWord(StringEntity, str):
    pass

Ancestors

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.str() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.

Expand source code
class Link(StringEntity):
    def __init__(self, url):
        try:
            parsed = urlparse(url)
            self.domain = parsed.netloc
            self.path = parsed.path
        except ValueError:
            self.domain = None
            self.path = None
        super().__init__(url)

Ancestors

class Organisation (name, domain)
Expand source code
class Organisation(Entity):
    def __init__(self, name, domain):
        self.domain = domain
        super().__init__(name)
        
    def __eq__(self, other):
        return super().__eq__(other) and self.domain == other.domain
    
    def __hash__(self):
        return hash((super().__hash__(), self.domain))
        
    
    def __repr__(self):
        return f"Organisation({self.name}, {self.domain})"
    
    def __str__(self):
        return f"{self.name} <{self.domain}>"

    def to_json(self):
        d = super().to_json()
        d["domain"] = self.domain
        return d
    
    @classmethod
    def from_json(cls, json_dict):
        return cls(json_dict["name"],
                   json_dict["domain"])

Ancestors

Static methods

def from_json(json_dict)
Expand source code
@classmethod
def from_json(cls, json_dict):
    return cls(json_dict["name"],
               json_dict["domain"])

Methods

def to_json(self)
Expand source code
def to_json(self):
    d = super().to_json()
    d["domain"] = self.domain
    return d
class Person (name, address)
Expand source code
class Person(Entity):
    def __init__(self, name, address):            
        self.address = Address(address)               
        self.organisation = self.address.org_from_address
        
        if not name: name = ""        
        name = name.strip("'").strip('"').replace("\n", " ")
        if name == self.address.name.lower():
            name = ""        
        
        super().__init__(name)
    
    def __eq__(self, other):
        return super().__eq__(other) and self.address == other.address
    
    def __hash__(self):
        return hash((super().__hash__(), self.address))
    
    def __repr__(self):
        return f"Person({self.name}, {self.address.name})"
    
    def __str__(self):
        return f"{self.name} <{self.address.name}>"
    
    def to_json(self):
        d = super().to_json()
        d["address"] = self.address.name
        return d
    
    @classmethod
    def from_json(cls, json_dict):
        return cls(json_dict["name"],
                   json_dict["address"])

Ancestors

Subclasses

Static methods

def from_json(json_dict)
Expand source code
@classmethod
def from_json(cls, json_dict):
    return cls(json_dict["name"],
               json_dict["address"])

Methods

def to_json(self)
Expand source code
def to_json(self):
    d = super().to_json()
    d["address"] = self.address.name
    return d
class StringEntity (string)

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.str() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.

Expand source code
class StringEntity(Entity, str):
    def __new__(cls, string):
        return str.__new__(cls, string)
    
    def __init__(self, string):
        Entity.__init__(self, string)

Ancestors

Subclasses

class Topic (index, word_dist, words=None)
Expand source code
class Topic(Entity):
    def __init__(self, index, word_dist, words=None):
        self.index = index
        self.word_dist = np.asarray(word_dist)
        self.word_dist.flags.writeable = False
        self.words = words if words else range(self.word_dist.size)
        self.sorted_words = tuple(self.words[i] for i in self.word_dist.argsort())
        
        super().__init__(str(index))
        
        
    def top_words(self, n, include_probs=False):
        if include_probs:
            return [(self.word_dist[i], KeyWord(self.words[i])) 
                    for i in self.word_dist.argsort()[:n]]
        return [KeyWord(w) for w in self.sorted_words[:n]]
    
    def __str__(self):
        return f"Topic {self.index}: " + f"{list(map(str, self.top_words(5)))}"[1:-1]
    
    def __repr__(self):
        probs, words = list(zip(*self.top_words(5, include_probs=True)))
        return f"Topic({self.index}, {probs}, {words})"
    
    def __hash__(self):
        return hash((self.index, self.word_dist.tostring()))
    
    def __eq__(self, other):
        if not isinstance(other, Topic):
            return False
        return self.index == other.index and np.array_equal(self.word_dist, other.word_dist)
    
    
    def to_json(self):
        return {"class": self.__class__.__name__,
                "index": self.index,
                "words": self.words,
                "word_dist": self.word_dist.tolist()}
    
    @classmethod
    def from_json(cls, json_dict):
        params = [json_dict[k] for k in ["index", "word_dist", "words"]]
        return cls(*params)

Ancestors

Static methods

def from_json(json_dict)
Expand source code
@classmethod
def from_json(cls, json_dict):
    params = [json_dict[k] for k in ["index", "word_dist", "words"]]
    return cls(*params)

Methods

def to_json(self)
Expand source code
def to_json(self):
    return {"class": self.__class__.__name__,
            "index": self.index,
            "words": self.words,
            "word_dist": self.word_dist.tolist()}
def top_words(self, n, include_probs=False)
Expand source code
def top_words(self, n, include_probs=False):
    if include_probs:
        return [(self.word_dist[i], KeyWord(self.words[i])) 
                for i in self.word_dist.argsort()[:n]]
    return [KeyWord(w) for w in self.sorted_words[:n]]
class TopicInstance (topic, confidence)
Expand source code
class TopicInstance:  
    def __init__(self, topic, confidence):
        self.topic = topic
        self.index = topic.index
        self.score = confidence
        
    def __str__(self):
        return f"Prob[{self.topic}] = {self.score:0.3f}"
    
    def __repr__(self):
        return f"TopicInstance({repr(self.topic)}, {self.score})"
    
    def __eq__(self, other):
        if isinstance(other, Topic):
            return self.topic == other
        elif isinstance(other, TopicInstance):
            return self.topic == other.topic
        else:
            return False
        
    def __hash__(self):
        return hash(self.topic)
    
    def to_json(self):
        d = {"class":self.__class__.__name__, "score":self.score}
        d["topic"] = self.topic.to_json(dumps=False)
        return d
    
    @classmethod
    def from_json(cls, json_dict):
        topic = Topic.from_json(json_dict["topic"])
        score = json_dict["score"]
        return cls(topic, score)

Static methods

def from_json(json_dict)
Expand source code
@classmethod
def from_json(cls, json_dict):
    topic = Topic.from_json(json_dict["topic"])
    score = json_dict["score"]
    return cls(topic, score)

Methods

def to_json(self)
Expand source code
def to_json(self):
    d = {"class":self.__class__.__name__, "score":self.score}
    d["topic"] = self.topic.to_json(dumps=False)
    return d