Module conversationkg.kgs.neo4j
Expand source code
# -*- coding: utf-8 -*-
from tqdm import tqdm
def put_conversation(tx, conversation):
subject_escaped = conversation.subject.replace('"', '\\"')
subject_escaped = conversation.subject.replace("'", "\\")
# subject_escaped = conversation.subject.replace('\\', '\\')
if not subject_escaped:
print("NO SUBJECT:", hash(conversation))
tx.run("""
MERGE (c:Conversation {id:$h})
ON CREATE SET c.subject = $subj
ON CREATE SET c.length = $n_emails
ON CREATE SET c.start = $start
ON CREATE SET c.end = $end
""",
h=hash(conversation), #h=subject_escaped[:20] + "...",
subj=subject_escaped,
n_emails=len(conversation),
start=conversation.start_time.strftime("%d.%m.%Y, %H:%M"),
end=conversation.end_time.strftime("%d.%m.%Y, %H:%M"))
def put_email(tx, email):
tx.run("""
MERGE (e:Email {id:$h})
ON CREATE SET e.time = $time
ON CREATE SET e.subject = $subject
""",
h = hash(email), time=email.time.strftime("%d.%m.%Y, %H:%M"),
subject=email.subject)
def put_entity(tx, entity):
class_name = entity.__class__.__name__
ent_dict = entity.to_json()
# print(ent_dict)
tx.run(f"""
MERGE (x:{class_name} {{id:$h}})
ON CREATE SET x.label = $name
""", h=hash(entity), name=entity.instance_label)
def put_topic(tx, topic):
tx.run("""
MERGE (t:Topic {id:$topic_id})
ON CREATE SET t.words = $words
""",
topic_id=topic.index, words=", ".join(topic.top_words(5)))
def connect_conversation(tx, conversation):
cmd = "MATCH (c:Conversation {id:$h})"
param_d = dict(h=hash(conversation))
for i, email in enumerate(conversation):
cmd += "\n" + f"MATCH (e{i}:Email {{id:$mail_h{i}}})"
param_d[f"mail_h{i}"] = hash(email)
for i, p in enumerate(conversation.interlocutors):
cmd += "\n" + f"MATCH (p{i}:Person {{id:$person_h{i}}})"
param_d[f"person_h{i}"] = hash(p)
for i, d in enumerate(conversation.documents):
cls = d.__class__.__name__
cmd += "\n" + f"MATCH (d{i}:{cls} {{id:$doc_h{i}}})"
param_d[f"doc_h{i}"] = hash(d)
if conversation.topic:
cmd += "\n" + f"MATCH (t:Topic {{id:$topic_id}})"
param_d["topic_id"] = conversation.topic.index
cmd += "\n" + "MERGE (c)-[rel_topic:is_about]->(t)"
for i, e in enumerate(conversation):
cmd += "\n" + f"MERGE (c)-[rel_email{i}:consists_of]->(e{i})"
for i, p in enumerate(conversation.interlocutors):
cmd += "\n" + f"MERGE (p{i})-[rel_person{i}:interlocutor_in]->(c)"
for i, d in enumerate(conversation.documents):
cmd += "\n" + f"MERGE (d{i})-[rel_doc{i}:mentioned_in]->(c)"
tx.run(cmd, **param_d)
return cmd, param_d
def connect_email(tx, email):
cmd = """
MATCH (e:Email {id: $h})
MATCH (p:Person {id: $sender_h})
MATCH (p2:Person {id: $receiver_h})
"""
param_d = dict(h=hash(email),
sender_h=hash(email.sender),
receiver_h=hash(email.receiver))
if email.topic:
cmd += "\n" + f"MATCH (t:Topic {{id:$topic_id}})"
param_d["topic_id"] = email.topic.index
cmd += "\n" + "MERGE (e)-[rel_topic:is_about]->(t)"
cmd += """
MERGE (p)-[rel_person:talked_to]->(p2)
"""
tx.run(cmd, **param_d)
return cmd, param_d
def connect_person(tx, person):
tx.run("""
MATCH (p:Person {id:$p_h})
MATCH (a:Address {id:$a_h})
MATCH (o:Organisation {id:$o_h})
MERGE (a)-[rel_addr:of]->(p)
MERGE (o)-[rel_org:of]->(p)
""",
p_h=hash(person), a_h=hash(person.address), o_h=hash(person.organisation))
def before(tx, event1, event2):
cls1, cls2 = event1.__class__.__name__, event2.__class__.__name__
cmd = f"""
MATCH (e1:{cls1} {{id: $h1}})
MATCH (e2:{cls2} {{id: $h2}})
MERGE (e1)-[rel:before]->(e2)
"""
param_d = dict(h1=hash(event1), h2=hash(event2))
tx.run(cmd, **param_d)
# print(cmd, param_d)
return cmd, param_d
#%% FROM conversationkg_backup/conversation_building/construct_graph.py
#from neo4j import GraphDatabase
#from neo4j_defs2 import put_conversation, put_email, put_entity, put_topic
#from neo4j_defs2 import connect_conversation, connect_email, connect_person
#from neo4j_defs2 import before
#driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "pwd"), encrypted=False)
def clear(tx):
tx.run("""MATCH (x)
DETACH DELETE x""")
def put_iter(func, iterable):
for item in tqdm(iterable, desc=func.__name__):
session.write_transaction(func, item)
def connect_iter(func, iterable):
for item in tqdm(iterable, desc=func.__name__):
session.write_transaction(func, item)
def consists_of(tx, conversation):
for email in conversation:
tx.run("""
MATCH (c:Conversation {id:$h})
MATCH (e:Email {id:$h1})
MERGE (c)-[rel:consists_of]->(e)
""",
h=hash(conversation), h1=hash(email))
#with driver.session() as session:
# session.write_transaction(clear)
#
#
# put_iter(put_conversation, corpus)
#
# put_iter(put_email, list(corpus.iter_emails()))
#
# put_iter(put_entity, EntityUniverse.entities.values())
#
# put_iter(put_topic, lda.topics)
#
#
# connect_iter(connect_conversation, corpus)
#
# connect_iter(connect_email, list(corpus.iter_emails()))
#
# connect_iter(connect_person,
# filter(lambda x: isinstance(x, Person), EntityUniverse.entities.values()))
#
#
# for conv1, conv2 in zip(corpus, corpus[1:]):
# session.write_transaction(before, conv1, conv2)
#
#
# for conv in tqdm(corpus, desc="before emails"):
# session.write_transaction(consists_of, conv)
# for e1, e2 in zip(conv, conv[1:]):
# session.write_transaction(before, e1, e2)
#%% FROM conversationkg_backup/analytics/graph_builders.py
# -*- coding: utf-8 -*-
#import spacy
#nlp = spacy.load("en")
#
#from neo4j import GraphDatabase
class GraphBuilder:
def __init__(self):
self.driver = GraphDatabase.driver("bolt://localhost:7687",
auth=("neo4j", "pwd"), encrypted=False)
def clear(self, tx):
tx.run("""MATCH (x)
DETACH DELETE x""")
def put_conversation(self, tx, conversation):
subject_escaped = conversation.subject.replace('"', '\\"')
subject_escaped = conversation.subject.replace("'", "\\")
# subject_escaped = conversation.subject.replace('\\', '\\')
if not subject_escaped:
print("NO SUBJECT:", hash(conversation))
tx.run("""
MERGE (c:Conversation {id:$h})
ON CREATE SET c.subject = $subj
ON CREATE SET c.length = $n_emails
ON CREATE SET c.start = $start
ON CREATE SET c.end = $end
""",
h=hash(conversation), #h=subject_escaped[:20] + "...",
subj=subject_escaped,
n_emails=len(conversation),
start=conversation.start_time.strftime("%d.%m.%Y, %H:%M"),
end=conversation.end_time.strftime("%d.%m.%Y, %H:%M"))
def put_email(self, tx, conv, email):
tx.run("""
MATCH (c:Conversation {id:$h_c})
MERGE (e:Email {id:$h})
ON CREATE SET e.time = $time
ON CREATE SET e.subject = $subject
MERGE (c)-[rel:consists_of]->(e)
""",
h_c=hash(conv),
h = hash(email), time=email.time.strftime("%d.%m.%Y, %H:%M"),
subject=email.subject)
def connect_email(self, tx, email, p):
tx.run("""
MATCH (e:Email {id:$h_e})
MATCH (p:Person {label:$n})
MERGE (e)-[rel2:mentions]->(p)
""", h_e=hash(email), n=p)
def connect_persons(self, tx, p1, p2):
tx.run("""
MATCH (p1:Person {label:$n1})
MATCH (p2:Person {label:$n2})
MERGE (p1)-[rel:talked_to]->(p2)
""", n1=p1, n2=p2)
def before(self, tx, event1, event2):
cls1, cls2 = event1.__class__.__name__, event2.__class__.__name__
cmd = f"""
MATCH (e1:{cls1} {{id: $h1}})
MATCH (e2:{cls2} {{id: $h2}})
MERGE (e1)-[rel:before]->(e2)
"""
param_d = dict(h1=hash(event1), h2=hash(event2))
tx.run(cmd, **param_d)
class EmailGraphBuilder(GraphBuilder):
def __init__(self, corpus):
super().__init__()
with self.driver.session() as session:
session.write_transaction(self.clear)
for conversation in tqdm(corpus):
session.write_transaction(self.put_conversation, conversation)
for email in conversation:
session.write_transaction(self.put_email, conversation, email)
s, r = email.sender.instance_label, email.receiver.instance_label
print(s, r)
session.write_transaction(self.put_person, s)
session.write_transaction(self.put_person, r)
session.write_transaction(self.connect_email, email, s)
session.write_transaction(self.connect_email, email, r)
session.write_transaction(self.connect_persons, s, r)
for conv1, conv2 in tqdm(zip(corpus, corpus[1:]), total=len(corpus),
desc="before conversations"):
session.write_transaction(self.before, conv1, conv2)
for conv in tqdm(corpus, desc="before emails"):
for e1, e2 in zip(conv, conv[1:]):
session.write_transaction(self.before, e1, e2)
def put_person(self, tx, p):
tx.run("""
MERGE (p:Person {label:$l})
""", l=p)
class TextGraphBuilder(GraphBuilder):
def __init__(self, corpus):
super().__init__()
persons = self.collect_persons(corpus)
with self.driver.session() as session:
session.write_transaction(self.clear)
session.write_transaction(self.put_persons, persons)
for conversation, c in tqdm(zip(corpus, persons),
total=len(corpus)):
session.write_transaction(self.put_conversation, conversation)
for email, em in zip(conversation, c):
session.write_transaction(self.put_email, conversation, email)
for ent1 in em:
session.write_transaction(self.connect_email,
email, ent1.text)
for ent2 in em:
if not ent2 == ent1:
session.write_transaction(self.connect_persons,
ent1.text, ent2.text)
for conv1, conv2 in tqdm(zip(corpus, corpus[1:]), total=len(corpus),
desc="before conversations"):
session.write_transaction(self.before, conv1, conv2)
for conv in tqdm(corpus, desc="before emails"):
for e1, e2 in zip(conv, conv[1:]):
session.write_transaction(self.before, e1, e2)
def get_entities(self, text):
return nlp(str(text)).ents
def collect_persons(self, corpus):
return [[[e for e in self.get_entities(email.body)
if e.label_ == "PERSON"]
for email in conversation] for conversation in corpus]
@staticmethod
def put_persons(tx, person_lists):
person_set = {e for conv in person_lists
for email in conv for e in email}
cmd = ""
d = {}
for j, person in enumerate(person_set):
cmd += f"\n MERGE (p{j}:Person {{label:$name{j}}})"
d[f"name{j}"] = person.text
tx.run(cmd, **d)
return cmd, d
Functions
def before(tx, event1, event2)
-
Expand source code
def before(tx, event1, event2): cls1, cls2 = event1.__class__.__name__, event2.__class__.__name__ cmd = f""" MATCH (e1:{cls1} {{id: $h1}}) MATCH (e2:{cls2} {{id: $h2}}) MERGE (e1)-[rel:before]->(e2) """ param_d = dict(h1=hash(event1), h2=hash(event2)) tx.run(cmd, **param_d) # print(cmd, param_d) return cmd, param_d
def clear(tx)
-
Expand source code
def clear(tx): tx.run("""MATCH (x) DETACH DELETE x""")
def connect_conversation(tx, conversation)
-
Expand source code
def connect_conversation(tx, conversation): cmd = "MATCH (c:Conversation {id:$h})" param_d = dict(h=hash(conversation)) for i, email in enumerate(conversation): cmd += "\n" + f"MATCH (e{i}:Email {{id:$mail_h{i}}})" param_d[f"mail_h{i}"] = hash(email) for i, p in enumerate(conversation.interlocutors): cmd += "\n" + f"MATCH (p{i}:Person {{id:$person_h{i}}})" param_d[f"person_h{i}"] = hash(p) for i, d in enumerate(conversation.documents): cls = d.__class__.__name__ cmd += "\n" + f"MATCH (d{i}:{cls} {{id:$doc_h{i}}})" param_d[f"doc_h{i}"] = hash(d) if conversation.topic: cmd += "\n" + f"MATCH (t:Topic {{id:$topic_id}})" param_d["topic_id"] = conversation.topic.index cmd += "\n" + "MERGE (c)-[rel_topic:is_about]->(t)" for i, e in enumerate(conversation): cmd += "\n" + f"MERGE (c)-[rel_email{i}:consists_of]->(e{i})" for i, p in enumerate(conversation.interlocutors): cmd += "\n" + f"MERGE (p{i})-[rel_person{i}:interlocutor_in]->(c)" for i, d in enumerate(conversation.documents): cmd += "\n" + f"MERGE (d{i})-[rel_doc{i}:mentioned_in]->(c)" tx.run(cmd, **param_d) return cmd, param_d
def connect_email(tx, email)
-
Expand source code
def connect_email(tx, email): cmd = """ MATCH (e:Email {id: $h}) MATCH (p:Person {id: $sender_h}) MATCH (p2:Person {id: $receiver_h}) """ param_d = dict(h=hash(email), sender_h=hash(email.sender), receiver_h=hash(email.receiver)) if email.topic: cmd += "\n" + f"MATCH (t:Topic {{id:$topic_id}})" param_d["topic_id"] = email.topic.index cmd += "\n" + "MERGE (e)-[rel_topic:is_about]->(t)" cmd += """ MERGE (p)-[rel_person:talked_to]->(p2) """ tx.run(cmd, **param_d) return cmd, param_d
def connect_iter(func, iterable)
-
Expand source code
def connect_iter(func, iterable): for item in tqdm(iterable, desc=func.__name__): session.write_transaction(func, item)
def connect_person(tx, person)
-
Expand source code
def connect_person(tx, person): tx.run(""" MATCH (p:Person {id:$p_h}) MATCH (a:Address {id:$a_h}) MATCH (o:Organisation {id:$o_h}) MERGE (a)-[rel_addr:of]->(p) MERGE (o)-[rel_org:of]->(p) """, p_h=hash(person), a_h=hash(person.address), o_h=hash(person.organisation))
def consists_of(tx, conversation)
-
Expand source code
def consists_of(tx, conversation): for email in conversation: tx.run(""" MATCH (c:Conversation {id:$h}) MATCH (e:Email {id:$h1}) MERGE (c)-[rel:consists_of]->(e) """, h=hash(conversation), h1=hash(email))
def put_conversation(tx, conversation)
-
Expand source code
def put_conversation(tx, conversation): subject_escaped = conversation.subject.replace('"', '\\"') subject_escaped = conversation.subject.replace("'", "\\") # subject_escaped = conversation.subject.replace('\\', '\\') if not subject_escaped: print("NO SUBJECT:", hash(conversation)) tx.run(""" MERGE (c:Conversation {id:$h}) ON CREATE SET c.subject = $subj ON CREATE SET c.length = $n_emails ON CREATE SET c.start = $start ON CREATE SET c.end = $end """, h=hash(conversation), #h=subject_escaped[:20] + "...", subj=subject_escaped, n_emails=len(conversation), start=conversation.start_time.strftime("%d.%m.%Y, %H:%M"), end=conversation.end_time.strftime("%d.%m.%Y, %H:%M"))
def put_email(tx, email)
-
Expand source code
def put_email(tx, email): tx.run(""" MERGE (e:Email {id:$h}) ON CREATE SET e.time = $time ON CREATE SET e.subject = $subject """, h = hash(email), time=email.time.strftime("%d.%m.%Y, %H:%M"), subject=email.subject)
def put_entity(tx, entity)
-
Expand source code
def put_entity(tx, entity): class_name = entity.__class__.__name__ ent_dict = entity.to_json() # print(ent_dict) tx.run(f""" MERGE (x:{class_name} {{id:$h}}) ON CREATE SET x.label = $name """, h=hash(entity), name=entity.instance_label)
def put_iter(func, iterable)
-
Expand source code
def put_iter(func, iterable): for item in tqdm(iterable, desc=func.__name__): session.write_transaction(func, item)
def put_topic(tx, topic)
-
Expand source code
def put_topic(tx, topic): tx.run(""" MERGE (t:Topic {id:$topic_id}) ON CREATE SET t.words = $words """, topic_id=topic.index, words=", ".join(topic.top_words(5)))
Classes
class EmailGraphBuilder (corpus)
-
Expand source code
class EmailGraphBuilder(GraphBuilder): def __init__(self, corpus): super().__init__() with self.driver.session() as session: session.write_transaction(self.clear) for conversation in tqdm(corpus): session.write_transaction(self.put_conversation, conversation) for email in conversation: session.write_transaction(self.put_email, conversation, email) s, r = email.sender.instance_label, email.receiver.instance_label print(s, r) session.write_transaction(self.put_person, s) session.write_transaction(self.put_person, r) session.write_transaction(self.connect_email, email, s) session.write_transaction(self.connect_email, email, r) session.write_transaction(self.connect_persons, s, r) for conv1, conv2 in tqdm(zip(corpus, corpus[1:]), total=len(corpus), desc="before conversations"): session.write_transaction(self.before, conv1, conv2) for conv in tqdm(corpus, desc="before emails"): for e1, e2 in zip(conv, conv[1:]): session.write_transaction(self.before, e1, e2) def put_person(self, tx, p): tx.run(""" MERGE (p:Person {label:$l}) """, l=p)
Ancestors
Methods
def put_person(self, tx, p)
-
Expand source code
def put_person(self, tx, p): tx.run(""" MERGE (p:Person {label:$l}) """, l=p)
class GraphBuilder
-
Expand source code
class GraphBuilder: def __init__(self): self.driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "pwd"), encrypted=False) def clear(self, tx): tx.run("""MATCH (x) DETACH DELETE x""") def put_conversation(self, tx, conversation): subject_escaped = conversation.subject.replace('"', '\\"') subject_escaped = conversation.subject.replace("'", "\\") # subject_escaped = conversation.subject.replace('\\', '\\') if not subject_escaped: print("NO SUBJECT:", hash(conversation)) tx.run(""" MERGE (c:Conversation {id:$h}) ON CREATE SET c.subject = $subj ON CREATE SET c.length = $n_emails ON CREATE SET c.start = $start ON CREATE SET c.end = $end """, h=hash(conversation), #h=subject_escaped[:20] + "...", subj=subject_escaped, n_emails=len(conversation), start=conversation.start_time.strftime("%d.%m.%Y, %H:%M"), end=conversation.end_time.strftime("%d.%m.%Y, %H:%M")) def put_email(self, tx, conv, email): tx.run(""" MATCH (c:Conversation {id:$h_c}) MERGE (e:Email {id:$h}) ON CREATE SET e.time = $time ON CREATE SET e.subject = $subject MERGE (c)-[rel:consists_of]->(e) """, h_c=hash(conv), h = hash(email), time=email.time.strftime("%d.%m.%Y, %H:%M"), subject=email.subject) def connect_email(self, tx, email, p): tx.run(""" MATCH (e:Email {id:$h_e}) MATCH (p:Person {label:$n}) MERGE (e)-[rel2:mentions]->(p) """, h_e=hash(email), n=p) def connect_persons(self, tx, p1, p2): tx.run(""" MATCH (p1:Person {label:$n1}) MATCH (p2:Person {label:$n2}) MERGE (p1)-[rel:talked_to]->(p2) """, n1=p1, n2=p2) def before(self, tx, event1, event2): cls1, cls2 = event1.__class__.__name__, event2.__class__.__name__ cmd = f""" MATCH (e1:{cls1} {{id: $h1}}) MATCH (e2:{cls2} {{id: $h2}}) MERGE (e1)-[rel:before]->(e2) """ param_d = dict(h1=hash(event1), h2=hash(event2)) tx.run(cmd, **param_d)
Subclasses
Methods
def before(self, tx, event1, event2)
-
Expand source code
def before(self, tx, event1, event2): cls1, cls2 = event1.__class__.__name__, event2.__class__.__name__ cmd = f""" MATCH (e1:{cls1} {{id: $h1}}) MATCH (e2:{cls2} {{id: $h2}}) MERGE (e1)-[rel:before]->(e2) """ param_d = dict(h1=hash(event1), h2=hash(event2)) tx.run(cmd, **param_d)
def clear(self, tx)
-
Expand source code
def clear(self, tx): tx.run("""MATCH (x) DETACH DELETE x""")
def connect_email(self, tx, email, p)
-
Expand source code
def connect_email(self, tx, email, p): tx.run(""" MATCH (e:Email {id:$h_e}) MATCH (p:Person {label:$n}) MERGE (e)-[rel2:mentions]->(p) """, h_e=hash(email), n=p)
def connect_persons(self, tx, p1, p2)
-
Expand source code
def connect_persons(self, tx, p1, p2): tx.run(""" MATCH (p1:Person {label:$n1}) MATCH (p2:Person {label:$n2}) MERGE (p1)-[rel:talked_to]->(p2) """, n1=p1, n2=p2)
def put_conversation(self, tx, conversation)
-
Expand source code
def put_conversation(self, tx, conversation): subject_escaped = conversation.subject.replace('"', '\\"') subject_escaped = conversation.subject.replace("'", "\\") # subject_escaped = conversation.subject.replace('\\', '\\') if not subject_escaped: print("NO SUBJECT:", hash(conversation)) tx.run(""" MERGE (c:Conversation {id:$h}) ON CREATE SET c.subject = $subj ON CREATE SET c.length = $n_emails ON CREATE SET c.start = $start ON CREATE SET c.end = $end """, h=hash(conversation), #h=subject_escaped[:20] + "...", subj=subject_escaped, n_emails=len(conversation), start=conversation.start_time.strftime("%d.%m.%Y, %H:%M"), end=conversation.end_time.strftime("%d.%m.%Y, %H:%M"))
def put_email(self, tx, conv, email)
-
Expand source code
def put_email(self, tx, conv, email): tx.run(""" MATCH (c:Conversation {id:$h_c}) MERGE (e:Email {id:$h}) ON CREATE SET e.time = $time ON CREATE SET e.subject = $subject MERGE (c)-[rel:consists_of]->(e) """, h_c=hash(conv), h = hash(email), time=email.time.strftime("%d.%m.%Y, %H:%M"), subject=email.subject)
class TextGraphBuilder (corpus)
-
Expand source code
class TextGraphBuilder(GraphBuilder): def __init__(self, corpus): super().__init__() persons = self.collect_persons(corpus) with self.driver.session() as session: session.write_transaction(self.clear) session.write_transaction(self.put_persons, persons) for conversation, c in tqdm(zip(corpus, persons), total=len(corpus)): session.write_transaction(self.put_conversation, conversation) for email, em in zip(conversation, c): session.write_transaction(self.put_email, conversation, email) for ent1 in em: session.write_transaction(self.connect_email, email, ent1.text) for ent2 in em: if not ent2 == ent1: session.write_transaction(self.connect_persons, ent1.text, ent2.text) for conv1, conv2 in tqdm(zip(corpus, corpus[1:]), total=len(corpus), desc="before conversations"): session.write_transaction(self.before, conv1, conv2) for conv in tqdm(corpus, desc="before emails"): for e1, e2 in zip(conv, conv[1:]): session.write_transaction(self.before, e1, e2) def get_entities(self, text): return nlp(str(text)).ents def collect_persons(self, corpus): return [[[e for e in self.get_entities(email.body) if e.label_ == "PERSON"] for email in conversation] for conversation in corpus] @staticmethod def put_persons(tx, person_lists): person_set = {e for conv in person_lists for email in conv for e in email} cmd = "" d = {} for j, person in enumerate(person_set): cmd += f"\n MERGE (p{j}:Person {{label:$name{j}}})" d[f"name{j}"] = person.text tx.run(cmd, **d) return cmd, d
Ancestors
Static methods
def put_persons(tx, person_lists)
-
Expand source code
@staticmethod def put_persons(tx, person_lists): person_set = {e for conv in person_lists for email in conv for e in email} cmd = "" d = {} for j, person in enumerate(person_set): cmd += f"\n MERGE (p{j}:Person {{label:$name{j}}})" d[f"name{j}"] = person.text tx.run(cmd, **d) return cmd, d
Methods
def collect_persons(self, corpus)
-
Expand source code
def collect_persons(self, corpus): return [[[e for e in self.get_entities(email.body) if e.label_ == "PERSON"] for email in conversation] for conversation in corpus]
def get_entities(self, text)
-
Expand source code
def get_entities(self, text): return nlp(str(text)).ents