This commit is contained in:
ahmed531998 2023-04-19 23:43:57 +02:00
parent 489deeb3aa
commit 35dfb21bf4
7 changed files with 157 additions and 77 deletions

14
DM.py
View File

@ -66,7 +66,19 @@ class DM:
return "findDataset" return "findDataset"
elif self.curr_state['intent'] == 'SUMMARIZEPAPER': elif self.curr_state['intent'] == 'SUMMARIZEPAPER':
return "sumPaper" return "sumPaper"
elif self.curr_state['intent'] == 'LISTPAPERS':
return "listPapers"
elif self.curr_state['intent'] == 'LISTDATASETS':
return "listDatasets"
elif self.curr_state['intent'] == 'LISTCOMMANDS':
return "listCommands"
elif self.curr_state['intent'] == 'LISTTOPICS':
return "listTopics"
elif self.curr_state['intent'] == 'LISTRESOURCES':
return "listResources"
elif self.curr_state['intent'] == 'COMMAND':
return "command"
else: else:
return "ConvGen" return "RetGen"
else: else:
return "Clarify" return "Clarify"

3
NLU.py
View File

@ -88,6 +88,9 @@ class NLU:
""" """
Query -> coref resolution & intent extraction -> if intents are not confident or if query is ambig -> rewrite query and recheck -> if still ambig, ask a clarifying question Query -> coref resolution & intent extraction -> if intents are not confident or if query is ambig -> rewrite query and recheck -> if still ambig, ask a clarifying question
""" """
if utterance in ["help", "list resources", "list papers", "list datasets", "list topics"]:
return {"modified_query": utterance, "intent": "COMMAND", "entities": [], "is_offensive": False, "is_clear": True}
self.to_process = utterance self.to_process = utterance
self.to_process = self._resolve_coref(history_consec) self.to_process = self._resolve_coref(history_consec)

View File

@ -90,7 +90,8 @@ class ResponseGenerator:
def gen_response(self, action, utterance=None, name=None, username=None, vrename=None, state=None, consec_history=None, chitchat_history=None): def gen_response(self, action, utterance=None, name=None, username=None, vrename=None, state=None, consec_history=None, chitchat_history=None):
if action == "Help": if action == "Help":
return "Hey " + name + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the " + vrename +" VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!" commands = " You can choose between using one of the supported commands to explore the environment or you can use natural language to find resourcesand get answers and summaries. \n "
return "Hey " + name + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the " + vrename +" VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!" + commands + self.gen_response(action="listCommands")
elif action == "Recommend": elif action == "Recommend":
prompt = self.recommender.make_recommendation(username, name) prompt = self.recommender.make_recommendation(username, name)
if prompt != "": if prompt != "":
@ -101,7 +102,8 @@ class ResponseGenerator:
elif action == "OffenseReject": elif action == "OffenseReject":
return "I am sorry, I cannot answer to this kind of language" return "I am sorry, I cannot answer to this kind of language"
elif action == "getHelp": elif action == "getHelp":
return "I can answer questions related to the papers in the VRE's catalog. I can also get you the posts, papers and datasets from the catalogue if you specify a topic or an author. I am also capable of small talk and summarizing papers to an extent. Just text me what you want and I will do it :)" commands = self.gen_response(action="listCommands")
return "I can answer questions related to the papers in the VRE's catalogue. I can also get you the posts, papers and datasets from the catalogue if you specify a topic or an author. I am also capable of small talk and summarizing papers to an extent. Just write to me what you want and I will do it. Alternatively, you may use one of the commands Janet supports. " + commands
elif action == "findPost": elif action == "findPost":
for entity in state['entities']: for entity in state['entities']:
@ -139,55 +141,55 @@ class ResponseGenerator:
for entity in state['entities']: for entity in state['entities']:
if (entity['entity'] == 'TITLE'): if (entity['entity'] == 'TITLE'):
self.paper = self._get_matching_titles('paper_db', entity['value']) self.paper = self._get_matching_titles('paper_db', entity['value'])
links = self._get_resources_links(self.paper) #links = self._get_resources_links(self.paper)
if len(self.paper) > 0 and len(links) > 0: if len(self.paper) > 0:# and len(links) > 0:
return str("Here is the paper you want: " + self.paper['title'] + '. ' + "It can be downloaded at " + links[0]) return str("Here is the paper you want: " + self.paper['title'] + '. ' + "It can be viewed at " + self.paper['url']) #links[0]
else: else:
self.paper = self._search_index('paper_titles_index', 'paper_db', entity['value']) self.paper = self._search_index('paper_titles_index', 'paper_db', entity['value'])
links = self._get_resources_links(self.paper) #links = self._get_resources_links(self.paper)
return str("This paper could be relevant: " + self.paper['title'] + '. ' + "It can be downloaded at " + links[0]) return str("This paper could be relevant: " + self.paper['title'] + '. ' + "It can be viewed at " + self.paper['url'])
if(entity['entity'] == 'TOPIC'): if(entity['entity'] == 'TOPIC'):
self.paper = self._get_matching_topics('paper_db', entity['value']) self.paper = self._get_matching_topics('paper_db', entity['value'])
links = self._get_resources_links(self.paper) #links = self._get_resources_links(self.paper)
if len(self.paper) > 0 and len(links) > 0: if len(self.paper) > 0: # and len(links) > 0:
return str("This paper could be relevant: " + self.paper['title'] + '. ' + "It can be downloaded at " + links[0]) return str("This paper could be relevant: " + self.paper['title'] + '. ' + "It can be viewed at " + self.paper['url'])
if(entity['entity'] == 'AUTHOR'): if(entity['entity'] == 'AUTHOR'):
self.paper = self._get_matching_authors('paper_db', entity['value']) self.paper = self._get_matching_authors('paper_db', entity['value'])
links = self._get_resources_links(self.paper) #links = self._get_resources_links(self.paper)
if len(self.paper) > 0 and len(links) > 0: if len(self.paper) > 0: # and len(links) > 0:
return str("Here is the paper you want: " + self.paper['title'] + '. ' + "It can be downloaded at " + links[0]) return str("Here is the paper you want: " + self.paper['title'] + '. ' + "It can be viewed at " + self.paper['url'])
self.paper = self._search_index('paper_desc_index', 'paper_db', utterance) self.paper = self._search_index('paper_desc_index', 'paper_db', utterance)
links = self._get_resources_links(self.paper) #links = self._get_resources_links(self.paper)
return str("This paper could be relevant: " + self.paper['title'] + '. ' + "It can be downloaded at " + links[0]) return str("This paper could be relevant: " + self.paper['title'] + '. ' + "It can be viewed at " + self.paper['url'])
elif action == "findDataset": elif action == "findDataset":
for entity in state['entities']: for entity in state['entities']:
if (entity['entity'] == 'TITLE'): if (entity['entity'] == 'TITLE'):
self.dataset = self._get_matching_titles('dataset_db', entity['value']) self.dataset = self._get_matching_titles('dataset_db', entity['value'])
links = self._get_resources_links(self.dataset) #links = self._get_resources_links(self.dataset)
if len(self.dataset) > 0 and len(links) > 0: if len(self.dataset) > 0: # and len(links) > 0:
return str("Here is the dataset you wanted: " + self.dataset['title'] + '. ' + "It can be downloaded at " + links[0]) return str("Here is the dataset you wanted: " + self.dataset['title'] + '. ' + "It can be viewed at " + self.dataset['url'])
else: else:
self.dataset = self._search_index('dataset_titles_index', 'dataset_db', entity['value']) self.dataset = self._search_index('dataset_titles_index', 'dataset_db', entity['value'])
links = self._get_resources_links(self.dataset) #links = self._get_resources_links(self.dataset)
return str("This dataset could be relevant: " + self.dataset['title'] + '. ' + "It can be downloaded at " + links[0]) return str("This dataset could be relevant: " + self.dataset['title'] + '. ' + "It can be viewed at " + self.dataset['url'])
if(entity['entity'] == 'TOPIC'): if(entity['entity'] == 'TOPIC'):
self.dataset = self._get_matching_topics('dataset_db', entity['value']) self.dataset = self._get_matching_topics('dataset_db', entity['value'])
links = self._get_resources_links(self.dataset) #links = self._get_resources_links(self.dataset)
if len(self.dataset) > 0 and len(links) > 0: if len(self.dataset) > 0: # and len(links) > 0:
return str("This dataset could be relevant: " + self.dataset['title'] + '. ' + "It can be downloaded at " + links[0]) return str("This dataset could be relevant: " + self.dataset['title'] + '. ' + "It can be viewed at " + self.dataset['url'])
if(entity['entity'] == 'AUTHOR'): if(entity['entity'] == 'AUTHOR'):
self.dataset = self._get_matching_authors('dataset_db', entity['value']) self.dataset = self._get_matching_authors('dataset_db', entity['value'])
links = self._get_resources_links(self.dataset) #links = self._get_resources_links(self.dataset)
if len(self.dataset) > 0 and len(links) > 0: if len(self.dataset) > 0: #and len(links) > 0:
return str("Here is the dataset you want: " + self.dataset['title'] + '. ' + "It can be downloaded at " + links[0]) return str("Here is the dataset you want: " + self.dataset['title'] + '. ' + "It can be viewed at " + self.dataset['url'])
self.dataset = self._search_index('dataset_desc_index', 'dataset_db', utterance) self.dataset = self._search_index('dataset_desc_index', 'dataset_db', utterance)
links = self._get_resources_links(self.dataset) #links = self._get_resources_links(self.dataset)
return str("This dataset could be relevant: " + self.dataset['title'] + '. ' + "It can be downloaded at " + links[0]) return str("This dataset could be relevant: " + self.dataset['title'] + '. ' + "It can be viewed at " + self.dataset['url'])
elif action == "RetGen": elif action == "RetGen":
@ -207,14 +209,61 @@ class ResponseGenerator:
gen_kwargs = {"length_penalty": 0.5, "num_beams":2, "max_length": 60, "repetition_penalty": 2.5, "temperature": 2} gen_kwargs = {"length_penalty": 0.5, "num_beams":2, "max_length": 60, "repetition_penalty": 2.5, "temperature": 2}
answer = self.generators['qa'](gen_seq, **gen_kwargs)[0]['generated_text'] answer = self.generators['qa'](gen_seq, **gen_kwargs)[0]['generated_text']
return "According to the following evidence: " + evidence + " \n _______ \n " + "The answer is: " + answer return "According to the following evidence: " + evidence + " \n _______ \n " + "The answer is: " + answer
elif action == "listPapers":
answer = vrename + " has the following papers: \n"
for i, pap in self.db['paper_db']:
answer = answer + ' ' + str(i) + ') ' + pap['title'] + ': ' + pap['notes'] + ' \n '
return answer
elif action == "listDatasets":
answer = vrename + " has the following datasets: \n"
for i, datase in self.db['dataset_db']:
answer = answer + ' ' + str(i) + ') ' + datase['title'] + ': ' +datase['notes'] + ' \n '
return answer
elif action == "listCommands":
return "Janet supports the following commands: \n 1) help : explains how to use Janet. \n 2) list resources : lists all the papers and datasets in the VRE. \n 3) list papers : lists all the papers in the VRE. \n 4) list datasets : lists all the datasets in the VRE. \n 5) list topics : lists the topics discussed in the VRE. \n 6) list commands : displays this list of commands. \n"
elif action == "listTopics":
topics = {}
for i, pos in self.db['post_db']:
for tag in pos['tags']:
topics[tag] = topics[tag]+1 if tag in topics else 1
topics = sorted(topics, reverse=True)
topic_string = topics[0]
for i in range(1, len(topics)):
topic_string = topic_string + ', ' + topics[i]
return "The main topics of " + vrename + " ordered by popularity are: " + topic_string + '. \n '
elif action == "listResources":
papers = self.gen_response(action="listPapers", vrename=vrename)
datasets = self.gen_response(action="listDatasets", vrename=vrename)
return papers + " Also, " + datasets
elif action == "command":
if utterance == "help":
return self.gen_response(action="Help", name=name, vrename=vrename)
elif utterance == "list resources":
return self.gen_response(action="listResources", vrename=vrename)
elif utterance == "list papers":
return self.gen_response(action="listPapers", vrename=vrename)
elif utterance == "list datasets":
return self.gen_response(action="listDatasets", vrename=vrename)
elif utterance == "list topics":
return self.gen_response(action="listTopics", vrename=vrename)
elif utterance == "list commands":
return self.gen_response(action="listCommands")
elif action == "sumPaper": elif action == "sumPaper":
if len(self.paper) == 0: if len(self.paper) == 0 or (len(self.paper) > 0 and len(state['entities'])>0)
for entity in state['entities']: for entity in state['entities']:
if (entity['entity'] == 'TITLE'): if (entity['entity'] == 'TITLE'):
self.paper = self._get_matching_titles('paper_db', entity['value']) paper = self._get_matching_titles('paper_db', entity['value'])
if (len(self.paper) > 0): if (len(paper) > 0):
self.paper = paper
break break
if (entity['entity'] == 'TOPIC'):
self.paper = self._get_matching_topics('paper_db', entity['value'])
if len(self.paper) == 0: if len(self.paper) == 0:
return "I cannot seem to find the requested paper. Try again by specifying the title of the paper." return "I cannot seem to find the requested paper. Try again by specifying the title of the paper."
#implement that #implement that

54
VRE.py
View File

@ -31,8 +31,8 @@ class VRE:
self.paper_counter = 0 self.paper_counter = 0
self.dataset_counter = 0 self.dataset_counter = 0
self.content_counter = 0 self.content_counter = 0
self.db = {'paper_db': pd.read_json(self.directory + self.name + '_paper.json') if os.path.isfile(self.directory + self.name + '_paper.json') else pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']), self.db = {'paper_db': pd.read_json(self.directory + self.name + '_paper.json') if os.path.isfile(self.directory + self.name + '_paper.json') else pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created', 'url']),
'dataset_db': pd.read_json(self.directory + self.name + '_dataset.json') if os.path.isfile(self.directory + self.name + '_dataset.json') else pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']), 'dataset_db': pd.read_json(self.directory + self.name + '_dataset.json') if os.path.isfile(self.directory + self.name + '_dataset.json') else pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created', 'url']),
'content_db': pd.read_json(self.directory + self.name + '_content.json') if os.path.isfile(self.directory + self.name + '_content.json') else pd.DataFrame(columns=['id', 'paperid', 'content']), 'content_db': pd.read_json(self.directory + self.name + '_content.json') if os.path.isfile(self.directory + self.name + '_content.json') else pd.DataFrame(columns=['id', 'paperid', 'content']),
'post_db': pd.read_json(self.directory + self.name + '_post.json') if os.path.isfile(self.directory + self.name + '_post.json') else pd.DataFrame(columns=['id', 'author', 'content', 'time', 'tags'])} 'post_db': pd.read_json(self.directory + self.name + '_post.json') if os.path.isfile(self.directory + self.name + '_post.json') else pd.DataFrame(columns=['id', 'author', 'content', 'time', 'tags'])}
self.index = {'dataset_titles_index': None if not os.path.isfile(self.directory + 'janet_dataset_titles_index') else faiss.read_index(self.directory + 'janet_dataset_titles_index'), self.index = {'dataset_titles_index': None if not os.path.isfile(self.directory + 'janet_dataset_titles_index') else faiss.read_index(self.directory + 'janet_dataset_titles_index'),
@ -45,8 +45,7 @@ class VRE:
def init(self): def init(self):
#first run #first run
if not os.path.isfile(self.directory + self.name + '_dataset' + '.json') or not os.path.isfile(self.directory + self.name + '_paper' + '.json') or not os.path.isfile(self.directory + self.name + '_content' + '.json') or not os.path.isfile(self.directory + self.name + '_post' + '.json'): self.get_content()
self.get_content()
if self.index['dataset_titles_index'] is None: if self.index['dataset_titles_index'] is None:
self.create_index('dataset_db', 'title', 'dataset_titles_index', 'janet_dataset_titles_index') self.create_index('dataset_db', 'title', 'dataset_titles_index', 'janet_dataset_titles_index')
self.populate_index('dataset_db', 'title', 'dataset_titles_index', 'janet_dataset_titles_index') self.populate_index('dataset_db', 'title', 'dataset_titles_index', 'janet_dataset_titles_index')
@ -153,14 +152,17 @@ class VRE:
keys = ['type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created'] keys = ['type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']
paper_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']) paper_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created', 'url'])
dataset_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']) dataset_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created', 'url'])
content_df = pd.DataFrame(columns=['id', 'paperid', 'content']) content_df = pd.DataFrame(columns=['id', 'paperid', 'content'])
content_df = self.get_vre_info(content_df)
for item in items_data: for item in items_data:
for el in item['extras']: for el in item['extras']:
if el['key'] == 'system:type': if el['key'] == 'system:type':
rsrc = el['value'] rsrc = el['value']
if el['key'] == 'Item URL':
url = el['value']
resources = [] resources = []
for resource in item['resources']: for resource in item['resources']:
resources.append( resources.append(
@ -176,12 +178,12 @@ class VRE:
self.lastupdatetime = date self.lastupdatetime = date
if rsrc == 'Paper': if rsrc == 'Paper':
self.paper_counter += 1 self.paper_counter += 1
paper_df.loc[str(self.paper_counter)] = [self.paper_counter, rsrc, resources, tags, title, author, notes, date] paper_df.loc[str(self.paper_counter)] = [self.paper_counter, rsrc, resources, tags, title, author, notes, date, url]
content_df = self.get_pdf_content(item, content_df) content_df = self.get_pdf_content(item, content_df)
content_df = self.get_txt_content(item, content_df) content_df = self.get_txt_content(item, content_df)
if rsrc == 'Dataset': if rsrc == 'Dataset':
self.dataset_counter += 1 self.dataset_counter += 1
dataset_df.loc[str(self.dataset_counter)] = [self.dataset_counter, rsrc, resources, tags, title, author, notes, date] dataset_df.loc[str(self.dataset_counter)] = [self.dataset_counter, rsrc, resources, tags, title, author, notes, date, url]
self.db['paper_db'] = paper_df.sort_values(by='metadata_created', ascending=True) self.db['paper_db'] = paper_df.sort_values(by='metadata_created', ascending=True)
self.db['dataset_db'] = dataset_df.sort_values(by='metadata_created', ascending=True) self.db['dataset_db'] = dataset_df.sort_values(by='metadata_created', ascending=True)
@ -189,15 +191,17 @@ class VRE:
other_content_df = pd.DataFrame(columns=['id', 'paperid', 'content']) other_content_df = pd.DataFrame(columns=['id', 'paperid', 'content'])
for i, post in post_df.iterrows(): for i, post in post_df.iterrows():
self.content_counter+=1 if post['author'] != "Catalogue":
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -1, post['author'] + ' posted: ' + post['content'] + ' It is about ' + ', '.join(post['tags'])] self.content_counter+=1
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -1, post['author'] + ' posted: ' + post['content'] + ' It is about ' + ', '.join(post['tags'])]
"""
for i, description in dataset_df.iterrows(): for i, description in dataset_df.iterrows():
self.content_counter+=1 self.content_counter+=1
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -2, description['title'] + ' is a dataset. ' + description['notes'] + ' It is about ' + ', '.join(description['tags']) ] other_content_df.loc[str(self.content_counter)] = [self.content_counter, -2, description['title'] + ' is a dataset. ' + description['notes'] + ' It is about ' + ', '.join(description['tags']) ]
for i, description in paper_df.iterrows(): for i, description in paper_df.iterrows():
self.content_counter+=1 self.content_counter+=1
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -3, description['title'] + ' is a paper. ' + description['notes'] + ' It is about ' + ', '.join(description['tags']) ] other_content_df.loc[str(self.content_counter)] = [self.content_counter, -3, description['title'] + ' is a paper. ' + description['notes'] + ' It is about ' + ', '.join(description['tags']) ]
"""
self.db['content_db'] = pd.concat([content_df, other_content_df]) self.db['content_db'] = pd.concat([content_df, other_content_df])
self.db['paper_db'].to_json(self.directory + self.name + '_paper.json') self.db['paper_db'].to_json(self.directory + self.name + '_paper.json')
self.db['dataset_db'].to_json(self.directory + self.name + '_dataset.json') self.db['dataset_db'].to_json(self.directory + self.name + '_dataset.json')
@ -245,14 +249,16 @@ class VRE:
keys = ['type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created'] keys = ['type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']
paper_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']) paper_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created', 'url'])
dataset_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']) dataset_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created', 'url'])
content_df = pd.DataFrame(columns=['id', 'paperid', 'content']) content_df = pd.DataFrame(columns=['id', 'paperid', 'content'])
for item in items_data: for item in items_data:
for el in item['extras']: for el in item['extras']:
if el['key'] == 'system:type': if el['key'] == 'system:type':
rsrc = el['value'] rsrc = el['value']
if el['key'] == 'Item URL':
url = el['value']
resources = [] resources = []
for resource in item['resources']: for resource in item['resources']:
resources.append( resources.append(
@ -269,12 +275,12 @@ class VRE:
if rsrc == 'Paper': if rsrc == 'Paper':
self.paper_counter += 1 self.paper_counter += 1
paper_df.loc[str(self.paper_counter)] = [self.paper_counter, rsrc, resources, tags, title, author, notes, date] paper_df.loc[str(self.paper_counter)] = [self.paper_counter, rsrc, resources, tags, title, author, notes, date, url]
content_df = self.get_pdf_content(item, content_df) content_df = self.get_pdf_content(item, content_df)
content_df = self.get_txt_content(item, content_df) content_df = self.get_txt_content(item, content_df)
if rsrc == 'Dataset': if rsrc == 'Dataset':
self.dataset_counter += 1 self.dataset_counter += 1
dataset_df.loc[str(self.dataset_counter)] = [self.dataset_counter, rsrc, resources, tags, title, author, notes, date] dataset_df.loc[str(self.dataset_counter)] = [self.dataset_counter, rsrc, resources, tags, title, author, notes, date, url]
self.db['paper_db'] = pd.concat([self.db['paper_db'], paper_df.sort_values(by='metadata_created', ascending=True)]) self.db['paper_db'] = pd.concat([self.db['paper_db'], paper_df.sort_values(by='metadata_created', ascending=True)])
self.db['dataset_db'] = pd.concat([self.db['dataset_db'], dataset_df.sort_values(by='metadata_created', ascending=True)]) self.db['dataset_db'] = pd.concat([self.db['dataset_db'], dataset_df.sort_values(by='metadata_created', ascending=True)])
@ -284,15 +290,17 @@ class VRE:
other_content_df = pd.DataFrame(columns=['id', 'paperid', 'content']) other_content_df = pd.DataFrame(columns=['id', 'paperid', 'content'])
for i, post in post_df.iterrows(): for i, post in post_df.iterrows():
self.content_counter+=1 if post['author'] != "Catalogue":
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -1, post['author'] + ' posted: ' + post['content'] + ' It is about ' + ', '.join(post['tags'])] self.content_counter+=1
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -1, post['author'] + ' posted: ' + post['content'] + ' It is about ' + ', '.join(post['tags'])]
"""
for i, description in dataset_df.iterrows(): for i, description in dataset_df.iterrows():
self.content_counter+=1 self.content_counter+=1
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -2, description['title'] + ' is a dataset. ' + description['notes'] + ' It is about ' + ', '.join(description['tags']) ] other_content_df.loc[str(self.content_counter)] = [self.content_counter, -2, description['title'] + ' is a dataset. ' + description['notes'] + ' It is about ' + ', '.join(description['tags']) ]
for i, description in paper_df.iterrows(): for i, description in paper_df.iterrows():
self.content_counter+=1 self.content_counter+=1
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -3, description['title'] + ' is a paper. ' + description['notes'] + ' It is about ' + ', '.join(description['tags']) ] other_content_df.loc[str(self.content_counter)] = [self.content_counter, -3, description['title'] + ' is a paper. ' + description['notes'] + ' It is about ' + ', '.join(description['tags']) ]
"""
self.db['paper_db'].to_json(self.directory + self.name + '_paper.json') self.db['paper_db'].to_json(self.directory + self.name + '_paper.json')
self.db['dataset_db'].to_json(self.directory + self.name + '_dataset.json') self.db['dataset_db'].to_json(self.directory + self.name + '_dataset.json')
@ -425,6 +433,14 @@ class VRE:
df.loc[str(self.content_counter)] = [self.content_counter, self.paper_counter, abstract] df.loc[str(self.content_counter)] = [self.content_counter, self.paper_counter, abstract]
return df return df
def get_vre_info(self, df):
with open('info.txt', 'r') as file:
content = file.read().replace('\n', ' ')
content = self.remove_useless_dots(content)
self.content_counter += 1
df.loc[str(self.content_counter)] = [self.content_counter, -6, content]
return df
def get_txt_content(self, item, df): def get_txt_content(self, item, df):
for rsrc in tqdm(item['resources']): for rsrc in tqdm(item['resources']):
response = requests.get(rsrc['url']) response = requests.get(rsrc['url'])

1
info.txt Normal file
View File

@ -0,0 +1 @@
The assistedlab VRE is an environment conceived to deploy and test Jant, the conversational assistant of D4Science. It contains a catalogue which has selected papers about machine learning topics in general and conversational agents development in particular. It also contains some datasets related to these topics.

35
main.py
View File

@ -31,13 +31,6 @@ cors = CORS(app, resources={r"/api/predict": {"origins": url},
r"/api/dm": {"origins": url}, r"/api/dm": {"origins": url},
r"/health": {"origins": "*"} r"/health": {"origins": "*"}
}) })
conn = psycopg2.connect(
host="janet-pg",
database=os.getenv("POSTGRES_DB"),
user=os.getenv("POSTGRES_USER"),
password=os.getenv("POSTGRES_PASSWORD"))
users = {} users = {}
def vre_fetch(): def vre_fetch():
@ -155,18 +148,23 @@ def predict():
def feedback(): def feedback():
data = request.get_json().get("feedback") data = request.get_json().get("feedback")
print(data) print(data)
cur = conn.cursor()
cur.execute('INSERT INTO feedback_experimental (query, history, janet_modified_query, is_modified_query_correct, user_modified_query, evidence_useful, response, preferred_response, response_length_feedback, response_fluency_feedback, response_truth_feedback, response_useful_feedback, response_time_feedback, response_intent) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)', try:
(data['query'], data['history'], data['modQuery'], conn = psycopg2.connect(host="janet-pg", database=os.getenv("POSTGRES_DB"), user=os.getenv("POSTGRES_USER"), password=os.getenv("POSTGRES_PASSWORD"))
data['queryModCorrect'], data['correctQuery'], data['evidence'], data['janetResponse'], data['preferredResponse'], data['length'], cur = conn.cursor()
data['fluency'], data['truthfulness'], data['usefulness'], cur.execute('INSERT INTO feedback_experimental (query, history, janet_modified_query, is_modified_query_correct, user_modified_query, evidence_useful, response, preferred_response, response_length_feedback, response_fluency_feedback, response_truth_feedback, response_useful_feedback, response_time_feedback, response_intent) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)',
data['speed'], data['intent']) (data['query'], data['history'], data['modQuery'],
) data['queryModCorrect'], data['correctQuery'], data['evidence'], data['janetResponse'], data['preferredResponse'], data['length'],
conn.commit() data['fluency'], data['truthfulness'], data['usefulness'],
cur.close() data['speed'], data['intent']))
reply = jsonify({"status": "done"}) conn.commit()
return reply cur.close()
reply = jsonify({"status": "done"})
return reply
except Exception as e:
return jsonify({"status": str(e)})
if __name__ == "__main__": if __name__ == "__main__":
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu"
@ -200,6 +198,7 @@ if __name__ == "__main__":
threading.Thread(target=vre_fetch, name='updatevre').start() threading.Thread(target=vre_fetch, name='updatevre').start()
threading.Thread(target=clear_inactive, name='clear').start() threading.Thread(target=clear_inactive, name='clear').start()
conn = psycopg2.connect(host="janet-pg", database=os.getenv("POSTGRES_DB"), user=os.getenv("POSTGRES_USER"), password=os.getenv("POSTGRES_PASSWORD"))
cur = conn.cursor() cur = conn.cursor()

View File

@ -13,7 +13,7 @@ cors = CORS(app, resources={r"/api/predict": {"origins": url},
r"/health": {"origins": "*"} r"/health": {"origins": "*"}
}) })
users = {} users = {}
"""
conn = psycopg2.connect( conn = psycopg2.connect(
host="janet-pg", host="janet-pg",
database=os.getenv("POSTGRES_DB"), database=os.getenv("POSTGRES_DB"),
@ -21,7 +21,7 @@ conn = psycopg2.connect(
password=os.getenv("POSTGRES_PASSWORD")) password=os.getenv("POSTGRES_PASSWORD"))
cur = conn.cursor() cur = conn.cursor()
"""
@app.route("/health", methods=['GET']) @app.route("/health", methods=['GET'])
def health(): def health():
return "Success", 200 return "Success", 200
@ -50,7 +50,7 @@ def init_dm():
def predict(): def predict():
time.sleep(10) time.sleep(10)
text = request.get_json().get("message") text = request.get_json().get("message")
message = {"answer": "answer", "query": "text", "cand": "candidate", "history": "history", "modQuery": "modQuery"} message = {"answer": "https://api.d4science.org/rest/2/people/profile answer https://api.d4science.org/rest/2/people/profile answer https://api.d4science.org/rest/2/people/profile", "query": "text", "cand": "candidate", "history": "history", "modQuery": "modQuery"}
reply = jsonify(message) reply = jsonify(message)
return reply return reply
@ -58,7 +58,7 @@ def predict():
def feedback(): def feedback():
data = request.get_json().get("feedback") data = request.get_json().get("feedback")
print(data) print(data)
"""
cur.execute('INSERT INTO feedback_experimental (query, history, janet_modified_query, is_modified_query_correct, user_modified_query, evidence_useful, response, preferred_response, response_length_feedback, response_fluency_feedback, response_truth_feedback, response_useful_feedback, response_time_feedback, response_intent) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)', cur.execute('INSERT INTO feedback_experimental (query, history, janet_modified_query, is_modified_query_correct, user_modified_query, evidence_useful, response, preferred_response, response_length_feedback, response_fluency_feedback, response_truth_feedback, response_useful_feedback, response_time_feedback, response_intent) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)',
(data['query'], data['history'], data['modQuery'], (data['query'], data['history'], data['modQuery'],
data['queryModCorrect'], data['correctQuery'], data['evidence'], data['janetResponse'], data['preferredResponse'], data['length'], data['queryModCorrect'], data['correctQuery'], data['evidence'], data['janetResponse'], data['preferredResponse'], data['length'],
@ -66,12 +66,12 @@ def feedback():
data['speed'], data['intent']) data['speed'], data['intent'])
) )
conn.commit() conn.commit()
"""
reply = jsonify({"status": "done"}) reply = jsonify({"status": "done"})
return reply return reply
if __name__ == "__main__": if __name__ == "__main__":
"""
cur.execute('CREATE TABLE IF NOT EXISTS feedback_experimental (id serial PRIMARY KEY,' cur.execute('CREATE TABLE IF NOT EXISTS feedback_experimental (id serial PRIMARY KEY,'
'query text NOT NULL,' 'query text NOT NULL,'
'history text NOT NULL,' 'history text NOT NULL,'
@ -88,5 +88,5 @@ if __name__ == "__main__":
'response_intent text NOT NULL);' 'response_intent text NOT NULL);'
) )
conn.commit() conn.commit()
"""
app.run(host='0.0.0.0') app.run(host='0.0.0.0')