fix
This commit is contained in:
parent
489deeb3aa
commit
35dfb21bf4
14
DM.py
14
DM.py
|
@ -66,7 +66,19 @@ class DM:
|
|||
return "findDataset"
|
||||
elif self.curr_state['intent'] == 'SUMMARIZEPAPER':
|
||||
return "sumPaper"
|
||||
elif self.curr_state['intent'] == 'LISTPAPERS':
|
||||
return "listPapers"
|
||||
elif self.curr_state['intent'] == 'LISTDATASETS':
|
||||
return "listDatasets"
|
||||
elif self.curr_state['intent'] == 'LISTCOMMANDS':
|
||||
return "listCommands"
|
||||
elif self.curr_state['intent'] == 'LISTTOPICS':
|
||||
return "listTopics"
|
||||
elif self.curr_state['intent'] == 'LISTRESOURCES':
|
||||
return "listResources"
|
||||
elif self.curr_state['intent'] == 'COMMAND':
|
||||
return "command"
|
||||
else:
|
||||
return "ConvGen"
|
||||
return "RetGen"
|
||||
else:
|
||||
return "Clarify"
|
||||
|
|
3
NLU.py
3
NLU.py
|
@ -88,6 +88,9 @@ class NLU:
|
|||
"""
|
||||
Query -> coref resolution & intent extraction -> if intents are not confident or if query is ambig -> rewrite query and recheck -> if still ambig, ask a clarifying question
|
||||
"""
|
||||
if utterance in ["help", "list resources", "list papers", "list datasets", "list topics"]:
|
||||
return {"modified_query": utterance, "intent": "COMMAND", "entities": [], "is_offensive": False, "is_clear": True}
|
||||
|
||||
self.to_process = utterance
|
||||
|
||||
self.to_process = self._resolve_coref(history_consec)
|
||||
|
|
|
@ -90,7 +90,8 @@ class ResponseGenerator:
|
|||
|
||||
def gen_response(self, action, utterance=None, name=None, username=None, vrename=None, state=None, consec_history=None, chitchat_history=None):
|
||||
if action == "Help":
|
||||
return "Hey " + name + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the " + vrename +" VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!"
|
||||
commands = " You can choose between using one of the supported commands to explore the environment or you can use natural language to find resourcesand get answers and summaries. \n "
|
||||
return "Hey " + name + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the " + vrename +" VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!" + commands + self.gen_response(action="listCommands")
|
||||
elif action == "Recommend":
|
||||
prompt = self.recommender.make_recommendation(username, name)
|
||||
if prompt != "":
|
||||
|
@ -101,7 +102,8 @@ class ResponseGenerator:
|
|||
elif action == "OffenseReject":
|
||||
return "I am sorry, I cannot answer to this kind of language"
|
||||
elif action == "getHelp":
|
||||
return "I can answer questions related to the papers in the VRE's catalog. I can also get you the posts, papers and datasets from the catalogue if you specify a topic or an author. I am also capable of small talk and summarizing papers to an extent. Just text me what you want and I will do it :)"
|
||||
commands = self.gen_response(action="listCommands")
|
||||
return "I can answer questions related to the papers in the VRE's catalogue. I can also get you the posts, papers and datasets from the catalogue if you specify a topic or an author. I am also capable of small talk and summarizing papers to an extent. Just write to me what you want and I will do it. Alternatively, you may use one of the commands Janet supports. " + commands
|
||||
|
||||
elif action == "findPost":
|
||||
for entity in state['entities']:
|
||||
|
@ -139,55 +141,55 @@ class ResponseGenerator:
|
|||
for entity in state['entities']:
|
||||
if (entity['entity'] == 'TITLE'):
|
||||
self.paper = self._get_matching_titles('paper_db', entity['value'])
|
||||
links = self._get_resources_links(self.paper)
|
||||
if len(self.paper) > 0 and len(links) > 0:
|
||||
return str("Here is the paper you want: " + self.paper['title'] + '. ' + "It can be downloaded at " + links[0])
|
||||
#links = self._get_resources_links(self.paper)
|
||||
if len(self.paper) > 0:# and len(links) > 0:
|
||||
return str("Here is the paper you want: " + self.paper['title'] + '. ' + "It can be viewed at " + self.paper['url']) #links[0]
|
||||
else:
|
||||
self.paper = self._search_index('paper_titles_index', 'paper_db', entity['value'])
|
||||
links = self._get_resources_links(self.paper)
|
||||
return str("This paper could be relevant: " + self.paper['title'] + '. ' + "It can be downloaded at " + links[0])
|
||||
#links = self._get_resources_links(self.paper)
|
||||
return str("This paper could be relevant: " + self.paper['title'] + '. ' + "It can be viewed at " + self.paper['url'])
|
||||
if(entity['entity'] == 'TOPIC'):
|
||||
self.paper = self._get_matching_topics('paper_db', entity['value'])
|
||||
links = self._get_resources_links(self.paper)
|
||||
if len(self.paper) > 0 and len(links) > 0:
|
||||
return str("This paper could be relevant: " + self.paper['title'] + '. ' + "It can be downloaded at " + links[0])
|
||||
#links = self._get_resources_links(self.paper)
|
||||
if len(self.paper) > 0: # and len(links) > 0:
|
||||
return str("This paper could be relevant: " + self.paper['title'] + '. ' + "It can be viewed at " + self.paper['url'])
|
||||
|
||||
if(entity['entity'] == 'AUTHOR'):
|
||||
self.paper = self._get_matching_authors('paper_db', entity['value'])
|
||||
links = self._get_resources_links(self.paper)
|
||||
if len(self.paper) > 0 and len(links) > 0:
|
||||
return str("Here is the paper you want: " + self.paper['title'] + '. ' + "It can be downloaded at " + links[0])
|
||||
#links = self._get_resources_links(self.paper)
|
||||
if len(self.paper) > 0: # and len(links) > 0:
|
||||
return str("Here is the paper you want: " + self.paper['title'] + '. ' + "It can be viewed at " + self.paper['url'])
|
||||
|
||||
self.paper = self._search_index('paper_desc_index', 'paper_db', utterance)
|
||||
links = self._get_resources_links(self.paper)
|
||||
return str("This paper could be relevant: " + self.paper['title'] + '. ' + "It can be downloaded at " + links[0])
|
||||
#links = self._get_resources_links(self.paper)
|
||||
return str("This paper could be relevant: " + self.paper['title'] + '. ' + "It can be viewed at " + self.paper['url'])
|
||||
|
||||
elif action == "findDataset":
|
||||
for entity in state['entities']:
|
||||
if (entity['entity'] == 'TITLE'):
|
||||
self.dataset = self._get_matching_titles('dataset_db', entity['value'])
|
||||
links = self._get_resources_links(self.dataset)
|
||||
if len(self.dataset) > 0 and len(links) > 0:
|
||||
return str("Here is the dataset you wanted: " + self.dataset['title'] + '. ' + "It can be downloaded at " + links[0])
|
||||
#links = self._get_resources_links(self.dataset)
|
||||
if len(self.dataset) > 0: # and len(links) > 0:
|
||||
return str("Here is the dataset you wanted: " + self.dataset['title'] + '. ' + "It can be viewed at " + self.dataset['url'])
|
||||
else:
|
||||
self.dataset = self._search_index('dataset_titles_index', 'dataset_db', entity['value'])
|
||||
links = self._get_resources_links(self.dataset)
|
||||
return str("This dataset could be relevant: " + self.dataset['title'] + '. ' + "It can be downloaded at " + links[0])
|
||||
#links = self._get_resources_links(self.dataset)
|
||||
return str("This dataset could be relevant: " + self.dataset['title'] + '. ' + "It can be viewed at " + self.dataset['url'])
|
||||
if(entity['entity'] == 'TOPIC'):
|
||||
self.dataset = self._get_matching_topics('dataset_db', entity['value'])
|
||||
links = self._get_resources_links(self.dataset)
|
||||
if len(self.dataset) > 0 and len(links) > 0:
|
||||
return str("This dataset could be relevant: " + self.dataset['title'] + '. ' + "It can be downloaded at " + links[0])
|
||||
#links = self._get_resources_links(self.dataset)
|
||||
if len(self.dataset) > 0: # and len(links) > 0:
|
||||
return str("This dataset could be relevant: " + self.dataset['title'] + '. ' + "It can be viewed at " + self.dataset['url'])
|
||||
|
||||
if(entity['entity'] == 'AUTHOR'):
|
||||
self.dataset = self._get_matching_authors('dataset_db', entity['value'])
|
||||
links = self._get_resources_links(self.dataset)
|
||||
if len(self.dataset) > 0 and len(links) > 0:
|
||||
return str("Here is the dataset you want: " + self.dataset['title'] + '. ' + "It can be downloaded at " + links[0])
|
||||
#links = self._get_resources_links(self.dataset)
|
||||
if len(self.dataset) > 0: #and len(links) > 0:
|
||||
return str("Here is the dataset you want: " + self.dataset['title'] + '. ' + "It can be viewed at " + self.dataset['url'])
|
||||
|
||||
self.dataset = self._search_index('dataset_desc_index', 'dataset_db', utterance)
|
||||
links = self._get_resources_links(self.dataset)
|
||||
return str("This dataset could be relevant: " + self.dataset['title'] + '. ' + "It can be downloaded at " + links[0])
|
||||
#links = self._get_resources_links(self.dataset)
|
||||
return str("This dataset could be relevant: " + self.dataset['title'] + '. ' + "It can be viewed at " + self.dataset['url'])
|
||||
|
||||
|
||||
elif action == "RetGen":
|
||||
|
@ -207,14 +209,61 @@ class ResponseGenerator:
|
|||
gen_kwargs = {"length_penalty": 0.5, "num_beams":2, "max_length": 60, "repetition_penalty": 2.5, "temperature": 2}
|
||||
answer = self.generators['qa'](gen_seq, **gen_kwargs)[0]['generated_text']
|
||||
return "According to the following evidence: " + evidence + " \n _______ \n " + "The answer is: " + answer
|
||||
elif action == "listPapers":
|
||||
answer = vrename + " has the following papers: \n"
|
||||
for i, pap in self.db['paper_db']:
|
||||
answer = answer + ' ' + str(i) + ') ' + pap['title'] + ': ' + pap['notes'] + ' \n '
|
||||
return answer
|
||||
|
||||
elif action == "listDatasets":
|
||||
answer = vrename + " has the following datasets: \n"
|
||||
for i, datase in self.db['dataset_db']:
|
||||
answer = answer + ' ' + str(i) + ') ' + datase['title'] + ': ' +datase['notes'] + ' \n '
|
||||
return answer
|
||||
|
||||
elif action == "listCommands":
|
||||
return "Janet supports the following commands: \n 1) help : explains how to use Janet. \n 2) list resources : lists all the papers and datasets in the VRE. \n 3) list papers : lists all the papers in the VRE. \n 4) list datasets : lists all the datasets in the VRE. \n 5) list topics : lists the topics discussed in the VRE. \n 6) list commands : displays this list of commands. \n"
|
||||
|
||||
elif action == "listTopics":
|
||||
topics = {}
|
||||
for i, pos in self.db['post_db']:
|
||||
for tag in pos['tags']:
|
||||
topics[tag] = topics[tag]+1 if tag in topics else 1
|
||||
topics = sorted(topics, reverse=True)
|
||||
topic_string = topics[0]
|
||||
for i in range(1, len(topics)):
|
||||
topic_string = topic_string + ', ' + topics[i]
|
||||
return "The main topics of " + vrename + " ordered by popularity are: " + topic_string + '. \n '
|
||||
|
||||
elif action == "listResources":
|
||||
papers = self.gen_response(action="listPapers", vrename=vrename)
|
||||
datasets = self.gen_response(action="listDatasets", vrename=vrename)
|
||||
return papers + " Also, " + datasets
|
||||
|
||||
elif action == "command":
|
||||
if utterance == "help":
|
||||
return self.gen_response(action="Help", name=name, vrename=vrename)
|
||||
elif utterance == "list resources":
|
||||
return self.gen_response(action="listResources", vrename=vrename)
|
||||
elif utterance == "list papers":
|
||||
return self.gen_response(action="listPapers", vrename=vrename)
|
||||
elif utterance == "list datasets":
|
||||
return self.gen_response(action="listDatasets", vrename=vrename)
|
||||
elif utterance == "list topics":
|
||||
return self.gen_response(action="listTopics", vrename=vrename)
|
||||
elif utterance == "list commands":
|
||||
return self.gen_response(action="listCommands")
|
||||
|
||||
elif action == "sumPaper":
|
||||
if len(self.paper) == 0:
|
||||
if len(self.paper) == 0 or (len(self.paper) > 0 and len(state['entities'])>0)
|
||||
for entity in state['entities']:
|
||||
if (entity['entity'] == 'TITLE'):
|
||||
self.paper = self._get_matching_titles('paper_db', entity['value'])
|
||||
if (len(self.paper) > 0):
|
||||
paper = self._get_matching_titles('paper_db', entity['value'])
|
||||
if (len(paper) > 0):
|
||||
self.paper = paper
|
||||
break
|
||||
if (entity['entity'] == 'TOPIC'):
|
||||
self.paper = self._get_matching_topics('paper_db', entity['value'])
|
||||
if len(self.paper) == 0:
|
||||
return "I cannot seem to find the requested paper. Try again by specifying the title of the paper."
|
||||
#implement that
|
||||
|
|
52
VRE.py
52
VRE.py
|
@ -31,8 +31,8 @@ class VRE:
|
|||
self.paper_counter = 0
|
||||
self.dataset_counter = 0
|
||||
self.content_counter = 0
|
||||
self.db = {'paper_db': pd.read_json(self.directory + self.name + '_paper.json') if os.path.isfile(self.directory + self.name + '_paper.json') else pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']),
|
||||
'dataset_db': pd.read_json(self.directory + self.name + '_dataset.json') if os.path.isfile(self.directory + self.name + '_dataset.json') else pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']),
|
||||
self.db = {'paper_db': pd.read_json(self.directory + self.name + '_paper.json') if os.path.isfile(self.directory + self.name + '_paper.json') else pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created', 'url']),
|
||||
'dataset_db': pd.read_json(self.directory + self.name + '_dataset.json') if os.path.isfile(self.directory + self.name + '_dataset.json') else pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created', 'url']),
|
||||
'content_db': pd.read_json(self.directory + self.name + '_content.json') if os.path.isfile(self.directory + self.name + '_content.json') else pd.DataFrame(columns=['id', 'paperid', 'content']),
|
||||
'post_db': pd.read_json(self.directory + self.name + '_post.json') if os.path.isfile(self.directory + self.name + '_post.json') else pd.DataFrame(columns=['id', 'author', 'content', 'time', 'tags'])}
|
||||
self.index = {'dataset_titles_index': None if not os.path.isfile(self.directory + 'janet_dataset_titles_index') else faiss.read_index(self.directory + 'janet_dataset_titles_index'),
|
||||
|
@ -45,8 +45,7 @@ class VRE:
|
|||
|
||||
def init(self):
|
||||
#first run
|
||||
if not os.path.isfile(self.directory + self.name + '_dataset' + '.json') or not os.path.isfile(self.directory + self.name + '_paper' + '.json') or not os.path.isfile(self.directory + self.name + '_content' + '.json') or not os.path.isfile(self.directory + self.name + '_post' + '.json'):
|
||||
self.get_content()
|
||||
self.get_content()
|
||||
if self.index['dataset_titles_index'] is None:
|
||||
self.create_index('dataset_db', 'title', 'dataset_titles_index', 'janet_dataset_titles_index')
|
||||
self.populate_index('dataset_db', 'title', 'dataset_titles_index', 'janet_dataset_titles_index')
|
||||
|
@ -153,14 +152,17 @@ class VRE:
|
|||
|
||||
keys = ['type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']
|
||||
|
||||
paper_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created'])
|
||||
dataset_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created'])
|
||||
paper_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created', 'url'])
|
||||
dataset_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created', 'url'])
|
||||
content_df = pd.DataFrame(columns=['id', 'paperid', 'content'])
|
||||
content_df = self.get_vre_info(content_df)
|
||||
|
||||
for item in items_data:
|
||||
for el in item['extras']:
|
||||
if el['key'] == 'system:type':
|
||||
rsrc = el['value']
|
||||
if el['key'] == 'Item URL':
|
||||
url = el['value']
|
||||
resources = []
|
||||
for resource in item['resources']:
|
||||
resources.append(
|
||||
|
@ -176,12 +178,12 @@ class VRE:
|
|||
self.lastupdatetime = date
|
||||
if rsrc == 'Paper':
|
||||
self.paper_counter += 1
|
||||
paper_df.loc[str(self.paper_counter)] = [self.paper_counter, rsrc, resources, tags, title, author, notes, date]
|
||||
paper_df.loc[str(self.paper_counter)] = [self.paper_counter, rsrc, resources, tags, title, author, notes, date, url]
|
||||
content_df = self.get_pdf_content(item, content_df)
|
||||
content_df = self.get_txt_content(item, content_df)
|
||||
if rsrc == 'Dataset':
|
||||
self.dataset_counter += 1
|
||||
dataset_df.loc[str(self.dataset_counter)] = [self.dataset_counter, rsrc, resources, tags, title, author, notes, date]
|
||||
dataset_df.loc[str(self.dataset_counter)] = [self.dataset_counter, rsrc, resources, tags, title, author, notes, date, url]
|
||||
|
||||
self.db['paper_db'] = paper_df.sort_values(by='metadata_created', ascending=True)
|
||||
self.db['dataset_db'] = dataset_df.sort_values(by='metadata_created', ascending=True)
|
||||
|
@ -189,15 +191,17 @@ class VRE:
|
|||
|
||||
other_content_df = pd.DataFrame(columns=['id', 'paperid', 'content'])
|
||||
for i, post in post_df.iterrows():
|
||||
self.content_counter+=1
|
||||
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -1, post['author'] + ' posted: ' + post['content'] + ' It is about ' + ', '.join(post['tags'])]
|
||||
if post['author'] != "Catalogue":
|
||||
self.content_counter+=1
|
||||
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -1, post['author'] + ' posted: ' + post['content'] + ' It is about ' + ', '.join(post['tags'])]
|
||||
"""
|
||||
for i, description in dataset_df.iterrows():
|
||||
self.content_counter+=1
|
||||
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -2, description['title'] + ' is a dataset. ' + description['notes'] + ' It is about ' + ', '.join(description['tags']) ]
|
||||
for i, description in paper_df.iterrows():
|
||||
self.content_counter+=1
|
||||
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -3, description['title'] + ' is a paper. ' + description['notes'] + ' It is about ' + ', '.join(description['tags']) ]
|
||||
|
||||
"""
|
||||
self.db['content_db'] = pd.concat([content_df, other_content_df])
|
||||
self.db['paper_db'].to_json(self.directory + self.name + '_paper.json')
|
||||
self.db['dataset_db'].to_json(self.directory + self.name + '_dataset.json')
|
||||
|
@ -245,14 +249,16 @@ class VRE:
|
|||
|
||||
keys = ['type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']
|
||||
|
||||
paper_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created'])
|
||||
dataset_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created'])
|
||||
paper_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created', 'url'])
|
||||
dataset_df = pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created', 'url'])
|
||||
content_df = pd.DataFrame(columns=['id', 'paperid', 'content'])
|
||||
|
||||
for item in items_data:
|
||||
for el in item['extras']:
|
||||
if el['key'] == 'system:type':
|
||||
rsrc = el['value']
|
||||
if el['key'] == 'Item URL':
|
||||
url = el['value']
|
||||
resources = []
|
||||
for resource in item['resources']:
|
||||
resources.append(
|
||||
|
@ -269,12 +275,12 @@ class VRE:
|
|||
|
||||
if rsrc == 'Paper':
|
||||
self.paper_counter += 1
|
||||
paper_df.loc[str(self.paper_counter)] = [self.paper_counter, rsrc, resources, tags, title, author, notes, date]
|
||||
paper_df.loc[str(self.paper_counter)] = [self.paper_counter, rsrc, resources, tags, title, author, notes, date, url]
|
||||
content_df = self.get_pdf_content(item, content_df)
|
||||
content_df = self.get_txt_content(item, content_df)
|
||||
if rsrc == 'Dataset':
|
||||
self.dataset_counter += 1
|
||||
dataset_df.loc[str(self.dataset_counter)] = [self.dataset_counter, rsrc, resources, tags, title, author, notes, date]
|
||||
dataset_df.loc[str(self.dataset_counter)] = [self.dataset_counter, rsrc, resources, tags, title, author, notes, date, url]
|
||||
|
||||
self.db['paper_db'] = pd.concat([self.db['paper_db'], paper_df.sort_values(by='metadata_created', ascending=True)])
|
||||
self.db['dataset_db'] = pd.concat([self.db['dataset_db'], dataset_df.sort_values(by='metadata_created', ascending=True)])
|
||||
|
@ -284,15 +290,17 @@ class VRE:
|
|||
|
||||
other_content_df = pd.DataFrame(columns=['id', 'paperid', 'content'])
|
||||
for i, post in post_df.iterrows():
|
||||
self.content_counter+=1
|
||||
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -1, post['author'] + ' posted: ' + post['content'] + ' It is about ' + ', '.join(post['tags'])]
|
||||
if post['author'] != "Catalogue":
|
||||
self.content_counter+=1
|
||||
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -1, post['author'] + ' posted: ' + post['content'] + ' It is about ' + ', '.join(post['tags'])]
|
||||
"""
|
||||
for i, description in dataset_df.iterrows():
|
||||
self.content_counter+=1
|
||||
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -2, description['title'] + ' is a dataset. ' + description['notes'] + ' It is about ' + ', '.join(description['tags']) ]
|
||||
for i, description in paper_df.iterrows():
|
||||
self.content_counter+=1
|
||||
other_content_df.loc[str(self.content_counter)] = [self.content_counter, -3, description['title'] + ' is a paper. ' + description['notes'] + ' It is about ' + ', '.join(description['tags']) ]
|
||||
|
||||
"""
|
||||
|
||||
self.db['paper_db'].to_json(self.directory + self.name + '_paper.json')
|
||||
self.db['dataset_db'].to_json(self.directory + self.name + '_dataset.json')
|
||||
|
@ -425,6 +433,14 @@ class VRE:
|
|||
df.loc[str(self.content_counter)] = [self.content_counter, self.paper_counter, abstract]
|
||||
return df
|
||||
|
||||
def get_vre_info(self, df):
|
||||
with open('info.txt', 'r') as file:
|
||||
content = file.read().replace('\n', ' ')
|
||||
content = self.remove_useless_dots(content)
|
||||
self.content_counter += 1
|
||||
df.loc[str(self.content_counter)] = [self.content_counter, -6, content]
|
||||
return df
|
||||
|
||||
def get_txt_content(self, item, df):
|
||||
for rsrc in tqdm(item['resources']):
|
||||
response = requests.get(rsrc['url'])
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
The assistedlab VRE is an environment conceived to deploy and test Jant, the conversational assistant of D4Science. It contains a catalogue which has selected papers about machine learning topics in general and conversational agents development in particular. It also contains some datasets related to these topics.
|
35
main.py
35
main.py
|
@ -31,13 +31,6 @@ cors = CORS(app, resources={r"/api/predict": {"origins": url},
|
|||
r"/api/dm": {"origins": url},
|
||||
r"/health": {"origins": "*"}
|
||||
})
|
||||
|
||||
conn = psycopg2.connect(
|
||||
host="janet-pg",
|
||||
database=os.getenv("POSTGRES_DB"),
|
||||
user=os.getenv("POSTGRES_USER"),
|
||||
password=os.getenv("POSTGRES_PASSWORD"))
|
||||
|
||||
users = {}
|
||||
|
||||
def vre_fetch():
|
||||
|
@ -155,17 +148,22 @@ def predict():
|
|||
def feedback():
|
||||
data = request.get_json().get("feedback")
|
||||
print(data)
|
||||
cur = conn.cursor()
|
||||
cur.execute('INSERT INTO feedback_experimental (query, history, janet_modified_query, is_modified_query_correct, user_modified_query, evidence_useful, response, preferred_response, response_length_feedback, response_fluency_feedback, response_truth_feedback, response_useful_feedback, response_time_feedback, response_intent) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)',
|
||||
(data['query'], data['history'], data['modQuery'],
|
||||
data['queryModCorrect'], data['correctQuery'], data['evidence'], data['janetResponse'], data['preferredResponse'], data['length'],
|
||||
data['fluency'], data['truthfulness'], data['usefulness'],
|
||||
data['speed'], data['intent'])
|
||||
)
|
||||
conn.commit()
|
||||
cur.close()
|
||||
reply = jsonify({"status": "done"})
|
||||
return reply
|
||||
|
||||
try:
|
||||
conn = psycopg2.connect(host="janet-pg", database=os.getenv("POSTGRES_DB"), user=os.getenv("POSTGRES_USER"), password=os.getenv("POSTGRES_PASSWORD"))
|
||||
cur = conn.cursor()
|
||||
cur.execute('INSERT INTO feedback_experimental (query, history, janet_modified_query, is_modified_query_correct, user_modified_query, evidence_useful, response, preferred_response, response_length_feedback, response_fluency_feedback, response_truth_feedback, response_useful_feedback, response_time_feedback, response_intent) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)',
|
||||
(data['query'], data['history'], data['modQuery'],
|
||||
data['queryModCorrect'], data['correctQuery'], data['evidence'], data['janetResponse'], data['preferredResponse'], data['length'],
|
||||
data['fluency'], data['truthfulness'], data['usefulness'],
|
||||
data['speed'], data['intent']))
|
||||
conn.commit()
|
||||
cur.close()
|
||||
reply = jsonify({"status": "done"})
|
||||
return reply
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({"status": str(e)})
|
||||
|
||||
if __name__ == "__main__":
|
||||
warnings.filterwarnings("ignore")
|
||||
|
@ -200,6 +198,7 @@ if __name__ == "__main__":
|
|||
|
||||
threading.Thread(target=vre_fetch, name='updatevre').start()
|
||||
threading.Thread(target=clear_inactive, name='clear').start()
|
||||
conn = psycopg2.connect(host="janet-pg", database=os.getenv("POSTGRES_DB"), user=os.getenv("POSTGRES_USER"), password=os.getenv("POSTGRES_PASSWORD"))
|
||||
|
||||
cur = conn.cursor()
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ cors = CORS(app, resources={r"/api/predict": {"origins": url},
|
|||
r"/health": {"origins": "*"}
|
||||
})
|
||||
users = {}
|
||||
|
||||
"""
|
||||
conn = psycopg2.connect(
|
||||
host="janet-pg",
|
||||
database=os.getenv("POSTGRES_DB"),
|
||||
|
@ -21,7 +21,7 @@ conn = psycopg2.connect(
|
|||
password=os.getenv("POSTGRES_PASSWORD"))
|
||||
|
||||
cur = conn.cursor()
|
||||
|
||||
"""
|
||||
@app.route("/health", methods=['GET'])
|
||||
def health():
|
||||
return "Success", 200
|
||||
|
@ -50,7 +50,7 @@ def init_dm():
|
|||
def predict():
|
||||
time.sleep(10)
|
||||
text = request.get_json().get("message")
|
||||
message = {"answer": "answer", "query": "text", "cand": "candidate", "history": "history", "modQuery": "modQuery"}
|
||||
message = {"answer": "https://api.d4science.org/rest/2/people/profile answer https://api.d4science.org/rest/2/people/profile answer https://api.d4science.org/rest/2/people/profile", "query": "text", "cand": "candidate", "history": "history", "modQuery": "modQuery"}
|
||||
reply = jsonify(message)
|
||||
return reply
|
||||
|
||||
|
@ -58,7 +58,7 @@ def predict():
|
|||
def feedback():
|
||||
data = request.get_json().get("feedback")
|
||||
print(data)
|
||||
|
||||
"""
|
||||
cur.execute('INSERT INTO feedback_experimental (query, history, janet_modified_query, is_modified_query_correct, user_modified_query, evidence_useful, response, preferred_response, response_length_feedback, response_fluency_feedback, response_truth_feedback, response_useful_feedback, response_time_feedback, response_intent) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)',
|
||||
(data['query'], data['history'], data['modQuery'],
|
||||
data['queryModCorrect'], data['correctQuery'], data['evidence'], data['janetResponse'], data['preferredResponse'], data['length'],
|
||||
|
@ -66,12 +66,12 @@ def feedback():
|
|||
data['speed'], data['intent'])
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
"""
|
||||
reply = jsonify({"status": "done"})
|
||||
return reply
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
"""
|
||||
cur.execute('CREATE TABLE IF NOT EXISTS feedback_experimental (id serial PRIMARY KEY,'
|
||||
'query text NOT NULL,'
|
||||
'history text NOT NULL,'
|
||||
|
@ -88,5 +88,5 @@ if __name__ == "__main__":
|
|||
'response_intent text NOT NULL);'
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
"""
|
||||
app.run(host='0.0.0.0')
|
||||
|
|
Loading…
Reference in New Issue