working locally
This commit is contained in:
parent
1efd0ac18d
commit
337da3f730
6
NLU.py
6
NLU.py
|
@ -1,5 +1,5 @@
|
||||||
import spacy
|
#import spacy
|
||||||
import spacy_transformers
|
#import spacy_transformers
|
||||||
import torch
|
import torch
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
@ -111,7 +111,7 @@ class NLU:
|
||||||
outputs = self.model.generate(input_ids=inputs, max_new_tokens=150)
|
outputs = self.model.generate(input_ids=inputs, max_new_tokens=150)
|
||||||
|
|
||||||
goal = self.tokenizer.decode(outputs[0])
|
goal = self.tokenizer.decode(outputs[0])
|
||||||
logging.debug("User's goal is:" + goal)
|
logging.info("User's goal is:" + goal)
|
||||||
|
|
||||||
#return goal.split("<start_of_turn>model\n")[-1].split("<eos>")[0]
|
#return goal.split("<start_of_turn>model\n")[-1].split("<eos>")[0]
|
||||||
return {"modified_query": goal.split("<start_of_turn>model\n")[-1].split("<eos>")[0],
|
return {"modified_query": goal.split("<start_of_turn>model\n")[-1].split("<eos>")[0],
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
{"id":{"3":3,"2":2,"1":1},"type":{"3":"Dataset","2":"Dataset","1":"Dataset"},"resources":{"3":[{"name":"intent classification dataset","url":"https:\/\/data.d4science.net\/899P","description":""}],"2":[{"name":"offensive language dataset","url":"https:\/\/data.d4science.net\/jZME","description":""}],"1":[{"name":"validation set","url":"https:\/\/data.d4science.net\/dTLm","description":""},{"name":"training set","url":"https:\/\/data.d4science.net\/6MXr","description":""}]},"tags":{"3":["intent classification"],"2":["hate speech detection","offensive language detection"],"1":["entity extraction"]},"title":{"3":"intent classification dataset","2":"offensive language dataset","1":"custom entity extraction dataset"},"author":{"3":"ibrahim ahmed salah tawfik","2":"ibrahim ahmed salah tawfik","1":"ibrahim ahmed salah tawfik"},"notes":{"3":"this is a dataset of possible inputs and their intents. it has been \r\ndeveloped for the purposes of developing a conversational agent for the \r\nvres. the supported intents are chitchat, findpaper, finddataset, qa and \r\nsummarizepapaer.","2":"this dataset consists of input texts and their labels in terms of being \r\noffensive, hateful or neither. it can be used for developing a model for \r\ndetecting offensive language.","1":"this dataset is supposed to be used with the spacy library to develop an \r\nentity extraction model. the supported entites are topic, author, date, \r\nqualifier and resource type."},"metadata_created":{"3":1676239892.8178350925,"2":1676240043.0586650372,"1":1676240176.4970309734},"url":{"3":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/intent_classification_dataset","2":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/offensive_language_dataset","1":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/custom_entity_extraction_dataset"}}
|
|
@ -0,0 +1 @@
|
||||||
|
{"id":{"7":7,"6":6,"5":5,"4":4,"3":3,"2":2,"1":1},"type":{"7":"Paper","6":"Paper","5":"Paper","4":"Paper","3":"Paper","2":"Paper","1":"Paper"},"resources":{"7":[{"name":"reinforcement learning","url":"https:\/\/data.d4science.net\/QDPK","description":""}],"6":[{"name":"retrieval-augmented generation","url":"https:\/\/data.d4science.net\/x3Yy","description":""}],"5":[{"name":"neural approaches to conversational information retrieval","url":"https:\/\/data.d4science.net\/Fr32","description":""}],"4":[{"name":"paper","url":"https:\/\/data.d4science.org\/shub\/E_cERSSERldlBFak1pOTZ4eXJRajM3ekl4a3l0L0JBZmpENE01TGRvNEE3TnB4UEhUTENTQ1RzbnJWQVFPKzRacg==","description":"paper about chatbots"}],"3":[{"name":"learning to summarize from human feedback","url":"https:\/\/data.d4science.org\/shub\/E_cnlTU2xJMTVXbXpSTHVJcDZPQkl0eThOUGRGR3ZqaFZUZGdWUmtHb25wN2pPbW9RUDVINFdQUXl1T1dwTXY5Vw==","description":"paper about developing models for machine summarization using human \r\nfeedback"}],"2":[{"name":"deep reinforcement learning from human preferences","url":"https:\/\/data.d4science.org\/shub\/E_NUMzdFB1Q0xiRGl4S2hFa3VEcU11NExrMVppb29hT0RvdEkwWDdOdTAyMWFLeTBleGx1V2Z5Z28rVVpBSlBYbQ==","description":""}],"1":[{"name":"candela et al. - 2013 - virtual research environments an overview and a r","url":"https:\/\/data.d4science.org\/shub\/E_VjR6dHFhcU1ycDVaSWpCbTYyWnNudkpOV1FiWGpKdFdaREVuRWE1OWk3RE1yS0lzRTdOTEh4Szdlb1lTREF5aw==","description":""}]},"tags":{"7":["reinforcement learning"],"6":["qa","retrieval","answer generation","transformers"],"5":["cir","ir","conversational agents","conversational information retrieval","neural information retrieval"],"4":["chatbot","conversational agent"],"3":["deep learning","language generation","reinforcement learning","reinforcement learning from human feedback","rlhf"],"2":["reinforcement learning","reinforcement learning from human feedback","rlhf"],"1":["vre","virtual research environment"]},"title":{"7":"reinforcement learning","6":"retrieval-augmented language generation","5":"neural conversational information retrieval","4":"survey about chatbots","3":"summarizing from human feedback","2":"deep reinforcement learning from human preferences","1":"virtual research environments: an overview and a research agenda"},"author":{"7":"ibrahim ahmed salah tawfik","6":"ibrahim ahmed salah tawfik","5":"ibrahim ahmed salah tawfik","4":"ibrahim ahmed salah tawfik","3":"ibrahim ahmed salah tawfik","2":"ibrahim ahmed salah tawfik","1":"candela leonardo"},"notes":{"7":"this paper explains the main concepts and algorithms of reinforcement \r\nlearning.","6":"this paper describes an algorithm to generate answers based on paragraphs \r\npotentially containing the answer.","5":"this paper is a survey about the different applications in which neural \r\nconversational information retrieval can be used.","4":"this paper presents a survey about chatbots and conversational agents.","3":"this paper explains a method for generating machine translations from human \r\nfeedback.","2":"this paper explains how to use human preferences to generate rewards for a \r\nreinforcement learning algorithm.","1":"virtual research environments are innovative, web-based, \r\ncommunity-oriented, comprehensive, flexible, and secure working \r\nenvironments conceived to serve the needs of modern science. we overview \r\nthe existing initiatives developing these environments by highlighting the \r\nmajor distinguishing features. we envisage a future where regardless of \r\ngeographical location, scientists will be able to use their web browsers to \r\nseamlessly access data, software, and processing resources that are managed \r\nby diverse systems in separate administration domains via virtual research \r\nenvironments. we identify and discuss the major challenges that should be \r\nresolved to fully achieve the proposed vision, i.e., large-scale \r\nintegration and interoperability, sustainability, and adoption."},"metadata_created":{"7":1676130193.0102539062,"6":1676130537.5889539719,"5":1676130738.7923879623,"4":1681208191.8971168995,"3":1681446416.8742809296,"2":1681446678.2581589222,"1":1681916621.1160180569},"url":{"7":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/reinforcement_learning","6":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/retrieval-augmented_language_generation","5":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/neural_conversational_information_retrieval","4":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/survey_about_chatbots","3":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/summarizing_from_human_feedback","2":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/deep_reinforcement_learning_from_human_preferences","1":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/virtual_research_environments_an_overview_and_a_research_agenda"}}
|
File diff suppressed because one or more lines are too long
26
info.txt
26
info.txt
|
@ -1 +1,27 @@
|
||||||
The assistedlab VRE is an environment conceived to deploy and test Jant, the conversational assistant of D4Science. It contains a catalogue which has selected papers about machine learning topics in general and conversational agents development in particular. It also contains some datasets related to these topics.
|
The assistedlab VRE is an environment conceived to deploy and test Jant, the conversational assistant of D4Science. It contains a catalogue which has selected papers about machine learning topics in general and conversational agents development in particular. It also contains some datasets related to these topics.
|
||||||
|
|
||||||
|
#nltk==3.7
|
||||||
|
#numpy==1.22.4
|
||||||
|
#pandas==1.3.5
|
||||||
|
#scikit-learn==1.0.2
|
||||||
|
#scipy==1.7.3
|
||||||
|
#sentencepiece==0.1.97
|
||||||
|
#sklearn-pandas==1.8.0
|
||||||
|
#spacy==3.4.4
|
||||||
|
#spacy-alignments==0.9.0
|
||||||
|
#spacy-legacy==3.0.12
|
||||||
|
#spacy-loggers==1.0.4
|
||||||
|
#spacy-transformers==1.1.9
|
||||||
|
#spacy-experimental==0.6.2
|
||||||
|
torch @ https://download.pytorch.org/whl/cu116/torch-1.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
|
||||||
|
torchaudio @ https://download.pytorch.org/whl/cu116/torchaudio-0.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
|
||||||
|
torchsummary==1.5.1
|
||||||
|
torchtext==0.14.1
|
||||||
|
torchvision @ https://download.pytorch.org/whl/cu116/torchvision-0.14.1%2Bcu116-cp38-cp38-linux_x86_64.whl
|
||||||
|
en-coreference-web-trf @ https://github.com/explosion/spacy-experimental/releases/download/v0.6.1/en_coreference_web_trf-3.4.0a2-py3-none-any.whl
|
||||||
|
|
||||||
|
|
||||||
|
Flask==1.1.4
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -0,0 +1,31 @@
|
||||||
|
{
|
||||||
|
"citation": "",
|
||||||
|
"description": "",
|
||||||
|
"features": {
|
||||||
|
"id": {
|
||||||
|
"dtype": "int64",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"paperid": {
|
||||||
|
"dtype": "int64",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"__index_level_0__": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"embeddings": {
|
||||||
|
"feature": {
|
||||||
|
"dtype": "float32",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"_type": "Sequence"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"homepage": "",
|
||||||
|
"license": ""
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"_data_files": [
|
||||||
|
{
|
||||||
|
"filename": "data-00000-of-00001.arrow"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"_fingerprint": "99e5d23916952a82",
|
||||||
|
"_format_columns": null,
|
||||||
|
"_format_kwargs": {},
|
||||||
|
"_format_type": null,
|
||||||
|
"_output_all_columns": false,
|
||||||
|
"_split": null
|
||||||
|
}
|
Binary file not shown.
|
@ -0,0 +1,70 @@
|
||||||
|
{
|
||||||
|
"citation": "",
|
||||||
|
"description": "",
|
||||||
|
"features": {
|
||||||
|
"id": {
|
||||||
|
"dtype": "int64",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"description": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tags": {
|
||||||
|
"feature": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"_type": "Sequence"
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"author": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"notes": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"metadata_created": {
|
||||||
|
"dtype": "float64",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"__index_level_0__": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"embeddings": {
|
||||||
|
"feature": {
|
||||||
|
"dtype": "float32",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"_type": "Sequence"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"homepage": "",
|
||||||
|
"license": ""
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"_data_files": [
|
||||||
|
{
|
||||||
|
"filename": "data-00000-of-00001.arrow"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"_fingerprint": "d15ca66770ecc202",
|
||||||
|
"_format_columns": null,
|
||||||
|
"_format_kwargs": {},
|
||||||
|
"_format_type": null,
|
||||||
|
"_output_all_columns": false,
|
||||||
|
"_split": null
|
||||||
|
}
|
Binary file not shown.
|
@ -0,0 +1,70 @@
|
||||||
|
{
|
||||||
|
"citation": "",
|
||||||
|
"description": "",
|
||||||
|
"features": {
|
||||||
|
"id": {
|
||||||
|
"dtype": "int64",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"description": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tags": {
|
||||||
|
"feature": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"_type": "Sequence"
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"author": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"notes": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"metadata_created": {
|
||||||
|
"dtype": "float64",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"__index_level_0__": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"embeddings": {
|
||||||
|
"feature": {
|
||||||
|
"dtype": "float32",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"_type": "Sequence"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"homepage": "",
|
||||||
|
"license": ""
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"_data_files": [
|
||||||
|
{
|
||||||
|
"filename": "data-00000-of-00001.arrow"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"_fingerprint": "178faebb2e165622",
|
||||||
|
"_format_columns": null,
|
||||||
|
"_format_kwargs": {},
|
||||||
|
"_format_type": null,
|
||||||
|
"_output_all_columns": false,
|
||||||
|
"_split": null
|
||||||
|
}
|
Binary file not shown.
|
@ -0,0 +1,70 @@
|
||||||
|
{
|
||||||
|
"citation": "",
|
||||||
|
"description": "",
|
||||||
|
"features": {
|
||||||
|
"id": {
|
||||||
|
"dtype": "int64",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"description": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tags": {
|
||||||
|
"feature": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"_type": "Sequence"
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"author": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"notes": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"metadata_created": {
|
||||||
|
"dtype": "float64",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"__index_level_0__": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"embeddings": {
|
||||||
|
"feature": {
|
||||||
|
"dtype": "float32",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"_type": "Sequence"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"homepage": "",
|
||||||
|
"license": ""
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"_data_files": [
|
||||||
|
{
|
||||||
|
"filename": "data-00000-of-00001.arrow"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"_fingerprint": "b949183484bc0637",
|
||||||
|
"_format_columns": null,
|
||||||
|
"_format_kwargs": {},
|
||||||
|
"_format_type": null,
|
||||||
|
"_output_all_columns": false,
|
||||||
|
"_split": null
|
||||||
|
}
|
Binary file not shown.
|
@ -0,0 +1,70 @@
|
||||||
|
{
|
||||||
|
"citation": "",
|
||||||
|
"description": "",
|
||||||
|
"features": {
|
||||||
|
"id": {
|
||||||
|
"dtype": "int64",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"description": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tags": {
|
||||||
|
"feature": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"_type": "Sequence"
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"author": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"notes": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"metadata_created": {
|
||||||
|
"dtype": "float64",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"__index_level_0__": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"embeddings": {
|
||||||
|
"feature": {
|
||||||
|
"dtype": "float32",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"_type": "Sequence"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"homepage": "",
|
||||||
|
"license": ""
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"_data_files": [
|
||||||
|
{
|
||||||
|
"filename": "data-00000-of-00001.arrow"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"_fingerprint": "3866b6141213754b",
|
||||||
|
"_format_columns": null,
|
||||||
|
"_format_kwargs": {},
|
||||||
|
"_format_type": null,
|
||||||
|
"_output_all_columns": false,
|
||||||
|
"_split": null
|
||||||
|
}
|
Binary file not shown.
|
@ -0,0 +1,42 @@
|
||||||
|
{
|
||||||
|
"citation": "",
|
||||||
|
"description": "",
|
||||||
|
"features": {
|
||||||
|
"id": {
|
||||||
|
"dtype": "int64",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"author": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"dtype": "int64",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"tags": {
|
||||||
|
"feature": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"_type": "Sequence"
|
||||||
|
},
|
||||||
|
"__index_level_0__": {
|
||||||
|
"dtype": "string",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"embeddings": {
|
||||||
|
"feature": {
|
||||||
|
"dtype": "float32",
|
||||||
|
"_type": "Value"
|
||||||
|
},
|
||||||
|
"_type": "Sequence"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"homepage": "",
|
||||||
|
"license": ""
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"_data_files": [
|
||||||
|
{
|
||||||
|
"filename": "data-00000-of-00001.arrow"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"_fingerprint": "4e5c812a622a33bc",
|
||||||
|
"_format_columns": null,
|
||||||
|
"_format_kwargs": {},
|
||||||
|
"_format_type": null,
|
||||||
|
"_output_all_columns": false,
|
||||||
|
"_split": null
|
||||||
|
}
|
35
main.py
35
main.py
|
@ -7,10 +7,10 @@ import torch
|
||||||
from flask import Flask, render_template, request, jsonify
|
from flask import Flask, render_template, request, jsonify
|
||||||
from flask_cors import CORS, cross_origin
|
from flask_cors import CORS, cross_origin
|
||||||
import psycopg2
|
import psycopg2
|
||||||
import spacy
|
#import spacy
|
||||||
import requests
|
import requests
|
||||||
import spacy_transformers
|
#import spacy_transformers
|
||||||
import torch
|
#import torch
|
||||||
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, AutoModelForCausalLM
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, AutoModelForCausalLM
|
||||||
from User import User
|
from User import User
|
||||||
from VRE import VRE
|
from VRE import VRE
|
||||||
|
@ -22,9 +22,9 @@ import pandas as pd
|
||||||
import time
|
import time
|
||||||
import threading
|
import threading
|
||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
from huggingface_hub import login
|
#from huggingface_hub import login
|
||||||
|
|
||||||
login(token="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")
|
#login(token="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")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ alive = "alive"
|
||||||
|
|
||||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
device_flag = torch.cuda.current_device() if torch.cuda.is_available() else -1
|
device_flag = torch.cuda.current_device() if torch.cuda.is_available() else -1
|
||||||
model_id = "/models/google-gemma"
|
model_id = "/models/google-gemma" #"google/gemma-2b-it"
|
||||||
dtype = torch.bfloat16
|
dtype = torch.bfloat16
|
||||||
|
|
||||||
#query_rewriter = pipeline("text2text-generation", model="castorini/t5-base-canard")
|
#query_rewriter = pipeline("text2text-generation", model="castorini/t5-base-canard")
|
||||||
|
@ -52,9 +52,10 @@ dtype = torch.bfloat16
|
||||||
|
|
||||||
#LLM = pipeline("text2text-generation", model="/models/google-gemma", device=device_flag)
|
#LLM = pipeline("text2text-generation", model="/models/google-gemma", device=device_flag)
|
||||||
|
|
||||||
LLM_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
|
|
||||||
LLM_model = AutoModelForCausalLM.from_pretrainedAutoModelForCausalLM.from_pretrained(
|
LLM_tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||||
"google/gemma-2b-it",
|
LLM_model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
model_id,
|
||||||
torch_dtype=torch.bfloat16
|
torch_dtype=torch.bfloat16
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -128,13 +129,13 @@ def init_dm():
|
||||||
token = request.get_json().get("token")
|
token = request.get_json().get("token")
|
||||||
status = request.get_json().get("stat")
|
status = request.get_json().get("stat")
|
||||||
if status == "start":
|
if status == "start":
|
||||||
logging.debug("status=start")
|
logging.info("status=start")
|
||||||
message = {"stat": "waiting", "err": ""}
|
message = {"stat": "waiting", "err": ""}
|
||||||
elif status == "set":
|
elif status == "set":
|
||||||
logging.debug("status=set")
|
logging.info("status=set")
|
||||||
headers = {"gcube-token": token, "Accept": "application/json"}
|
headers = {"gcube-token": token, "Accept": "application/json"}
|
||||||
if token not in users:
|
if token not in users:
|
||||||
logging.debug("getting user info")
|
logging.info("getting user info")
|
||||||
url = 'https://api.d4science.org/rest/2/people/profile'
|
url = 'https://api.d4science.org/rest/2/people/profile'
|
||||||
response = requests.get(url, headers=headers)
|
response = requests.get(url, headers=headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
|
@ -175,20 +176,20 @@ def predict():
|
||||||
message = {}
|
message = {}
|
||||||
try:
|
try:
|
||||||
if text == "<HELP_ON_START>":
|
if text == "<HELP_ON_START>":
|
||||||
logging.debug("help on start - inactive")
|
logging.info("help on start - inactive")
|
||||||
state = {'help': True, 'inactive': False, 'modified_query':"", 'intent':""}
|
state = {'help': True, 'inactive': False, 'modified_query':"", 'intent':""}
|
||||||
dm.update(state)
|
dm.update(state)
|
||||||
action = dm.next_action()
|
action = dm.next_action()
|
||||||
logging.debug("next action:" + action)
|
logging.info("next action:" + action)
|
||||||
#response = "Hey " + users[token]['name'].split()[0] + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!"
|
#response = "Hey " + users[token]['name'].split()[0] + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!"
|
||||||
response = rg.gen_response(action, vrename=vre.name, username=users[token]['username'], name=users[token]['name'].split()[0])
|
response = rg.gen_response(action, vrename=vre.name, username=users[token]['username'], name=users[token]['name'].split()[0])
|
||||||
message = {"answer": response}
|
message = {"answer": response}
|
||||||
elif text == "<RECOMMEND_ON_IDLE>":
|
elif text == "<RECOMMEND_ON_IDLE>":
|
||||||
logging.debug("recommend on idle - inactive")
|
logging.info("recommend on idle - inactive")
|
||||||
state = {'help': False, 'inactive': True, 'modified_query':"recommed: ", 'intent':""}
|
state = {'help': False, 'inactive': True, 'modified_query':"recommed: ", 'intent':""}
|
||||||
dm.update(state)
|
dm.update(state)
|
||||||
action = dm.next_action()
|
action = dm.next_action()
|
||||||
logging.debug("next action:" + action)
|
logging.info("next action:" + action)
|
||||||
#response = "Hey " + users[token]['name'].split()[0] + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!"
|
#response = "Hey " + users[token]['name'].split()[0] + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!"
|
||||||
|
|
||||||
response = rg.gen_response(action, username=users[token]['username'],name=users[token]['name'].split()[0], vrename=vre.name)
|
response = rg.gen_response(action, username=users[token]['username'],name=users[token]['name'].split()[0], vrename=vre.name)
|
||||||
|
@ -213,7 +214,7 @@ user: {text}""")
|
||||||
# rec.generate_recommendations(users[token]['username'], new_user_interests, new_vre_material)
|
# rec.generate_recommendations(users[token]['username'], new_user_interests, new_vre_material)
|
||||||
dm.update(state)
|
dm.update(state)
|
||||||
action = dm.next_action()
|
action = dm.next_action()
|
||||||
logging.debug("Next action: " + action)
|
logging.info("Next action: " + action)
|
||||||
#response = rg.gen_response(action=action, utterance=state['modified_query'], state=dm.get_recent_state(), consec_history=dm.get_consec_history(), chitchat_history=dm.get_chitchat_history(), vrename=vre.name, username=users[token]['username'], name=users[token]['name'].split()[0])
|
#response = rg.gen_response(action=action, utterance=state['modified_query'], state=dm.get_recent_state(), consec_history=dm.get_consec_history(), chitchat_history=dm.get_chitchat_history(), vrename=vre.name, username=users[token]['username'], name=users[token]['name'].split()[0])
|
||||||
#message = {"answer": response, "query": text, "cand": "candidate", "history": dm.get_consec_history(), "modQuery": state['modified_query']}
|
#message = {"answer": response, "query": text, "cand": "candidate", "history": dm.get_consec_history(), "modQuery": state['modified_query']}
|
||||||
message = {"answer": state['modified_query'], "query": text, "cand": "candidate", "history": dm.get_history(), "modQuery": state['modified_query']}
|
message = {"answer": state['modified_query'], "query": text, "cand": "candidate", "history": dm.get_history(), "modQuery": state['modified_query']}
|
||||||
|
|
|
@ -1,40 +1,31 @@
|
||||||
faiss-gpu==1.7.2
|
faiss-gpu==1.7.2
|
||||||
Flask==1.1.4
|
jinja2==3.0.0
|
||||||
|
Flask
|
||||||
flask-cors==3.0.10
|
flask-cors==3.0.10
|
||||||
protobuf==3.20.0
|
protobuf==3.20.0
|
||||||
matplotlib==3.5.3
|
matplotlib==3.5.3
|
||||||
nltk==3.7
|
scikit-learn
|
||||||
numpy==1.22.4
|
sklearn-pandas
|
||||||
pandas==1.3.5
|
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
pdfquery
|
pdfquery
|
||||||
html2text
|
html2text
|
||||||
|
nltk
|
||||||
|
numpy
|
||||||
|
pandas
|
||||||
regex==2022.6.2
|
regex==2022.6.2
|
||||||
requests==2.25.1
|
requests==2.25.1
|
||||||
scikit-learn==1.0.2
|
torch
|
||||||
scipy==1.7.3
|
|
||||||
sentencepiece==0.1.97
|
|
||||||
sklearn-pandas==1.8.0
|
|
||||||
spacy==3.4.4
|
|
||||||
spacy-alignments==0.9.0
|
|
||||||
spacy-legacy==3.0.12
|
|
||||||
spacy-loggers==1.0.4
|
|
||||||
spacy-transformers==1.1.9
|
|
||||||
spacy-experimental==0.6.2
|
|
||||||
torch @ https://download.pytorch.org/whl/cu116/torch-1.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
|
|
||||||
torchaudio @ https://download.pytorch.org/whl/cu116/torchaudio-0.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
|
|
||||||
torchsummary==1.5.1
|
|
||||||
torchtext==0.14.1
|
|
||||||
sentence-transformers
|
sentence-transformers
|
||||||
torchvision @ https://download.pytorch.org/whl/cu116/torchvision-0.14.1%2Bcu116-cp38-cp38-linux_x86_64.whl
|
|
||||||
tqdm==4.64.1
|
tqdm==4.64.1
|
||||||
transformers
|
transformers
|
||||||
markupsafe==2.0.1
|
markupsafe==2.0.1
|
||||||
psycopg2==2.9.5
|
psycopg2==2.9.5
|
||||||
en-coreference-web-trf @ https://github.com/explosion/spacy-experimental/releases/download/v0.6.1/en_coreference_web_trf-3.4.0a2-py3-none-any.whl
|
|
||||||
datasets
|
datasets
|
||||||
|
itsdangerous==2.0.1
|
||||||
huggingface_hub
|
huggingface_hub
|
||||||
Werkzeug==1.0.1
|
Werkzeug==1.0.1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue