working locally

2024-05-31 01:32:42 +02:00 · 2024-05-31 01:32:42 +02:00 · 337da3f730
parent 1efd0ac18d
commit 337da3f730
26 changed files with 493 additions and 40 deletions
--- a/NLU.py
+++ b/NLU.py
@ -1,5 +1,5 @@
-import spacy
+#import spacy
-import spacy_transformers
+#import spacy_transformers
 import torch
 import logging
@ -111,7 +111,7 @@ class NLU:
        outputs = self.model.generate(input_ids=inputs, max_new_tokens=150)
        goal = self.tokenizer.decode(outputs[0])
-        logging.debug("User's goal is:" + goal)
+        logging.info("User's goal is:" + goal)
        #return goal.split("<start_of_turn>model\n")[-1].split("<eos>")[0]
        return {"modified_query": goal.split("<start_of_turn>model\n")[-1].split("<eos>")[0],
--- a/assistedlab_content.json
+++ b/assistedlab_content.json
--- a/assistedlab_dataset.json
+++ b/assistedlab_dataset.json
@ -0,0 +1 @@
 {"id":{"3":3,"2":2,"1":1},"type":{"3":"Dataset","2":"Dataset","1":"Dataset"},"resources":{"3":[{"name":"intent classification dataset","url":"https:\/\/data.d4science.net\/899P","description":""}],"2":[{"name":"offensive language dataset","url":"https:\/\/data.d4science.net\/jZME","description":""}],"1":[{"name":"validation set","url":"https:\/\/data.d4science.net\/dTLm","description":""},{"name":"training set","url":"https:\/\/data.d4science.net\/6MXr","description":""}]},"tags":{"3":["intent classification"],"2":["hate speech detection","offensive language detection"],"1":["entity extraction"]},"title":{"3":"intent classification dataset","2":"offensive language dataset","1":"custom entity extraction dataset"},"author":{"3":"ibrahim ahmed salah tawfik","2":"ibrahim ahmed salah tawfik","1":"ibrahim ahmed salah tawfik"},"notes":{"3":"this is a dataset of possible inputs and their intents. it has been \r\ndeveloped for the purposes of developing a conversational agent for the \r\nvres. the supported intents are chitchat, findpaper, finddataset, qa and \r\nsummarizepapaer.","2":"this dataset consists of input texts and their labels in terms of being \r\noffensive, hateful or neither. it can be used for developing a model for \r\ndetecting offensive language.","1":"this dataset is supposed to be used with the spacy library to develop an \r\nentity extraction model. the supported entites are topic, author, date, \r\nqualifier and resource type."},"metadata_created":{"3":1676239892.8178350925,"2":1676240043.0586650372,"1":1676240176.4970309734},"url":{"3":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/intent_classification_dataset","2":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/offensive_language_dataset","1":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/custom_entity_extraction_dataset"}}
--- a/assistedlab_paper.json
+++ b/assistedlab_paper.json
@ -0,0 +1 @@
 {"id":{"7":7,"6":6,"5":5,"4":4,"3":3,"2":2,"1":1},"type":{"7":"Paper","6":"Paper","5":"Paper","4":"Paper","3":"Paper","2":"Paper","1":"Paper"},"resources":{"7":[{"name":"reinforcement learning","url":"https:\/\/data.d4science.net\/QDPK","description":""}],"6":[{"name":"retrieval-augmented generation","url":"https:\/\/data.d4science.net\/x3Yy","description":""}],"5":[{"name":"neural approaches to conversational information retrieval","url":"https:\/\/data.d4science.net\/Fr32","description":""}],"4":[{"name":"paper","url":"https:\/\/data.d4science.org\/shub\/E_cERSSERldlBFak1pOTZ4eXJRajM3ekl4a3l0L0JBZmpENE01TGRvNEE3TnB4UEhUTENTQ1RzbnJWQVFPKzRacg==","description":"paper about chatbots"}],"3":[{"name":"learning to summarize from human feedback","url":"https:\/\/data.d4science.org\/shub\/E_cnlTU2xJMTVXbXpSTHVJcDZPQkl0eThOUGRGR3ZqaFZUZGdWUmtHb25wN2pPbW9RUDVINFdQUXl1T1dwTXY5Vw==","description":"paper about developing models for machine summarization using human \r\nfeedback"}],"2":[{"name":"deep reinforcement learning from human preferences","url":"https:\/\/data.d4science.org\/shub\/E_NUMzdFB1Q0xiRGl4S2hFa3VEcU11NExrMVppb29hT0RvdEkwWDdOdTAyMWFLeTBleGx1V2Z5Z28rVVpBSlBYbQ==","description":""}],"1":[{"name":"candela et al. - 2013 - virtual research environments an overview and a r","url":"https:\/\/data.d4science.org\/shub\/E_VjR6dHFhcU1ycDVaSWpCbTYyWnNudkpOV1FiWGpKdFdaREVuRWE1OWk3RE1yS0lzRTdOTEh4Szdlb1lTREF5aw==","description":""}]},"tags":{"7":["reinforcement learning"],"6":["qa","retrieval","answer generation","transformers"],"5":["cir","ir","conversational agents","conversational information retrieval","neural information retrieval"],"4":["chatbot","conversational agent"],"3":["deep learning","language generation","reinforcement learning","reinforcement learning from human feedback","rlhf"],"2":["reinforcement learning","reinforcement learning from human feedback","rlhf"],"1":["vre","virtual research environment"]},"title":{"7":"reinforcement learning","6":"retrieval-augmented language generation","5":"neural conversational information retrieval","4":"survey about chatbots","3":"summarizing from human feedback","2":"deep reinforcement learning from human preferences","1":"virtual research environments: an overview and a research agenda"},"author":{"7":"ibrahim ahmed salah tawfik","6":"ibrahim ahmed salah tawfik","5":"ibrahim ahmed salah tawfik","4":"ibrahim ahmed salah tawfik","3":"ibrahim ahmed salah tawfik","2":"ibrahim ahmed salah tawfik","1":"candela leonardo"},"notes":{"7":"this paper explains the main concepts and algorithms of reinforcement \r\nlearning.","6":"this paper describes an algorithm to generate answers based on paragraphs \r\npotentially containing the answer.","5":"this paper is a survey about the different applications in which neural \r\nconversational information retrieval can be used.","4":"this paper presents a survey about chatbots and conversational agents.","3":"this paper explains a method for generating machine translations from human \r\nfeedback.","2":"this paper explains how to use human preferences to generate rewards for a \r\nreinforcement learning algorithm.","1":"virtual research environments are innovative, web-based, \r\ncommunity-oriented, comprehensive, flexible, and secure working \r\nenvironments conceived to serve the needs of modern science. we overview \r\nthe existing initiatives developing these environments by highlighting the \r\nmajor distinguishing features. we envisage a future where regardless of \r\ngeographical location, scientists will be able to use their web browsers to \r\nseamlessly access data, software, and processing resources that are managed \r\nby diverse systems in separate administration domains via virtual research \r\nenvironments. we identify and discuss the major challenges that should be \r\nresolved to fully achieve the proposed vision, i.e., large-scale \r\nintegration and interoperability, sustainability, and adoption."},"metadata_created":{"7":1676130193.0102539062,"6":1676130537.5889539719,"5":1676130738.7923879623,"4":1681208191.8971168995,"3":1681446416.8742809296,"2":1681446678.2581589222,"1":1681916621.1160180569},"url":{"7":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/reinforcement_learning","6":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/retrieval-augmented_language_generation","5":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/neural_conversational_information_retrieval","4":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/survey_about_chatbots","3":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/summarizing_from_human_feedback","2":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/deep_reinforcement_learning_from_human_preferences","1":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/virtual_research_environments_an_overview_and_a_research_agenda"}}
--- a/assistedlab_post.json
+++ b/assistedlab_post.json
--- a/info.txt
+++ b/info.txt
@ -1 +1,27 @@
 The assistedlab VRE is an environment conceived to deploy and test Jant, the conversational assistant of D4Science. It contains a catalogue which has selected papers about machine learning topics in general and conversational agents development in particular. It also contains some datasets related to these topics. 
 #nltk==3.7
 #numpy==1.22.4
 #pandas==1.3.5
 #scikit-learn==1.0.2
 #scipy==1.7.3
 #sentencepiece==0.1.97
 #sklearn-pandas==1.8.0
 #spacy==3.4.4
 #spacy-alignments==0.9.0
 #spacy-legacy==3.0.12
 #spacy-loggers==1.0.4
 #spacy-transformers==1.1.9
 #spacy-experimental==0.6.2
 torch @ https://download.pytorch.org/whl/cu116/torch-1.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
 torchaudio @ https://download.pytorch.org/whl/cu116/torchaudio-0.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
 torchsummary==1.5.1
 torchtext==0.14.1
 torchvision @ https://download.pytorch.org/whl/cu116/torchvision-0.14.1%2Bcu116-cp38-cp38-linux_x86_64.whl
 en-coreference-web-trf @ https://github.com/explosion/spacy-experimental/releases/download/v0.6.1/en_coreference_web_trf-3.4.0a2-py3-none-any.whl
 Flask==1.1.4
--- a/janet_content_index/data-00000-of-00001.arrow
+++ b/janet_content_index/data-00000-of-00001.arrow
--- a/janet_content_index/dataset_info.json
+++ b/janet_content_index/dataset_info.json
@ -0,0 +1,31 @@
 {
  "citation": "",
  "description": "",
  "features": {
    "id": {
      "dtype": "int64",
      "_type": "Value"
    },
    "paperid": {
      "dtype": "int64",
      "_type": "Value"
    },
    "content": {
      "dtype": "string",
      "_type": "Value"
    },
    "__index_level_0__": {
      "dtype": "string",
      "_type": "Value"
    },
    "embeddings": {
      "feature": {
        "dtype": "float32",
        "_type": "Value"
      },
      "_type": "Sequence"
    }
  },
  "homepage": "",
  "license": ""
 }
--- a/janet_content_index/state.json
+++ b/janet_content_index/state.json
@ -0,0 +1,13 @@
 {
  "_data_files": [
    {
      "filename": "data-00000-of-00001.arrow"
    }
  ],
  "_fingerprint": "99e5d23916952a82",
  "_format_columns": null,
  "_format_kwargs": {},
  "_format_type": null,
  "_output_all_columns": false,
  "_split": null
 }
--- a/janet_dataset_desc_index/data-00000-of-00001.arrow
+++ b/janet_dataset_desc_index/data-00000-of-00001.arrow
--- a/janet_dataset_desc_index/dataset_info.json
+++ b/janet_dataset_desc_index/dataset_info.json
@ -0,0 +1,70 @@
 {
  "citation": "",
  "description": "",
  "features": {
    "id": {
      "dtype": "int64",
      "_type": "Value"
    },
    "type": {
      "dtype": "string",
      "_type": "Value"
    },
    "resources": [
      {
        "description": {
          "dtype": "string",
          "_type": "Value"
        },
        "name": {
          "dtype": "string",
          "_type": "Value"
        },
        "url": {
          "dtype": "string",
          "_type": "Value"
        }
      }
    ],
    "tags": {
      "feature": {
        "dtype": "string",
        "_type": "Value"
      },
      "_type": "Sequence"
    },
    "title": {
      "dtype": "string",
      "_type": "Value"
    },
    "author": {
      "dtype": "string",
      "_type": "Value"
    },
    "notes": {
      "dtype": "string",
      "_type": "Value"
    },
    "metadata_created": {
      "dtype": "float64",
      "_type": "Value"
    },
    "url": {
      "dtype": "string",
      "_type": "Value"
    },
    "__index_level_0__": {
      "dtype": "string",
      "_type": "Value"
    },
    "embeddings": {
      "feature": {
        "dtype": "float32",
        "_type": "Value"
      },
      "_type": "Sequence"
    }
  },
  "homepage": "",
  "license": ""
 }
--- a/janet_dataset_desc_index/state.json
+++ b/janet_dataset_desc_index/state.json
@ -0,0 +1,13 @@
 {
  "_data_files": [
    {
      "filename": "data-00000-of-00001.arrow"
    }
  ],
  "_fingerprint": "d15ca66770ecc202",
  "_format_columns": null,
  "_format_kwargs": {},
  "_format_type": null,
  "_output_all_columns": false,
  "_split": null
 }
--- a/janet_dataset_titles_index/data-00000-of-00001.arrow
+++ b/janet_dataset_titles_index/data-00000-of-00001.arrow
--- a/janet_dataset_titles_index/dataset_info.json
+++ b/janet_dataset_titles_index/dataset_info.json
@ -0,0 +1,70 @@
 {
  "citation": "",
  "description": "",
  "features": {
    "id": {
      "dtype": "int64",
      "_type": "Value"
    },
    "type": {
      "dtype": "string",
      "_type": "Value"
    },
    "resources": [
      {
        "description": {
          "dtype": "string",
          "_type": "Value"
        },
        "name": {
          "dtype": "string",
          "_type": "Value"
        },
        "url": {
          "dtype": "string",
          "_type": "Value"
        }
      }
    ],
    "tags": {
      "feature": {
        "dtype": "string",
        "_type": "Value"
      },
      "_type": "Sequence"
    },
    "title": {
      "dtype": "string",
      "_type": "Value"
    },
    "author": {
      "dtype": "string",
      "_type": "Value"
    },
    "notes": {
      "dtype": "string",
      "_type": "Value"
    },
    "metadata_created": {
      "dtype": "float64",
      "_type": "Value"
    },
    "url": {
      "dtype": "string",
      "_type": "Value"
    },
    "__index_level_0__": {
      "dtype": "string",
      "_type": "Value"
    },
    "embeddings": {
      "feature": {
        "dtype": "float32",
        "_type": "Value"
      },
      "_type": "Sequence"
    }
  },
  "homepage": "",
  "license": ""
 }
--- a/janet_dataset_titles_index/state.json
+++ b/janet_dataset_titles_index/state.json
@ -0,0 +1,13 @@
 {
  "_data_files": [
    {
      "filename": "data-00000-of-00001.arrow"
    }
  ],
  "_fingerprint": "178faebb2e165622",
  "_format_columns": null,
  "_format_kwargs": {},
  "_format_type": null,
  "_output_all_columns": false,
  "_split": null
 }
--- a/janet_paper_desc_index/data-00000-of-00001.arrow
+++ b/janet_paper_desc_index/data-00000-of-00001.arrow
--- a/janet_paper_desc_index/dataset_info.json
+++ b/janet_paper_desc_index/dataset_info.json
@ -0,0 +1,70 @@
 {
  "citation": "",
  "description": "",
  "features": {
    "id": {
      "dtype": "int64",
      "_type": "Value"
    },
    "type": {
      "dtype": "string",
      "_type": "Value"
    },
    "resources": [
      {
        "description": {
          "dtype": "string",
          "_type": "Value"
        },
        "name": {
          "dtype": "string",
          "_type": "Value"
        },
        "url": {
          "dtype": "string",
          "_type": "Value"
        }
      }
    ],
    "tags": {
      "feature": {
        "dtype": "string",
        "_type": "Value"
      },
      "_type": "Sequence"
    },
    "title": {
      "dtype": "string",
      "_type": "Value"
    },
    "author": {
      "dtype": "string",
      "_type": "Value"
    },
    "notes": {
      "dtype": "string",
      "_type": "Value"
    },
    "metadata_created": {
      "dtype": "float64",
      "_type": "Value"
    },
    "url": {
      "dtype": "string",
      "_type": "Value"
    },
    "__index_level_0__": {
      "dtype": "string",
      "_type": "Value"
    },
    "embeddings": {
      "feature": {
        "dtype": "float32",
        "_type": "Value"
      },
      "_type": "Sequence"
    }
  },
  "homepage": "",
  "license": ""
 }
--- a/janet_paper_desc_index/state.json
+++ b/janet_paper_desc_index/state.json
@ -0,0 +1,13 @@
 {
  "_data_files": [
    {
      "filename": "data-00000-of-00001.arrow"
    }
  ],
  "_fingerprint": "b949183484bc0637",
  "_format_columns": null,
  "_format_kwargs": {},
  "_format_type": null,
  "_output_all_columns": false,
  "_split": null
 }
--- a/janet_paper_titles_index/data-00000-of-00001.arrow
+++ b/janet_paper_titles_index/data-00000-of-00001.arrow
--- a/janet_paper_titles_index/dataset_info.json
+++ b/janet_paper_titles_index/dataset_info.json
@ -0,0 +1,70 @@
 {
  "citation": "",
  "description": "",
  "features": {
    "id": {
      "dtype": "int64",
      "_type": "Value"
    },
    "type": {
      "dtype": "string",
      "_type": "Value"
    },
    "resources": [
      {
        "description": {
          "dtype": "string",
          "_type": "Value"
        },
        "name": {
          "dtype": "string",
          "_type": "Value"
        },
        "url": {
          "dtype": "string",
          "_type": "Value"
        }
      }
    ],
    "tags": {
      "feature": {
        "dtype": "string",
        "_type": "Value"
      },
      "_type": "Sequence"
    },
    "title": {
      "dtype": "string",
      "_type": "Value"
    },
    "author": {
      "dtype": "string",
      "_type": "Value"
    },
    "notes": {
      "dtype": "string",
      "_type": "Value"
    },
    "metadata_created": {
      "dtype": "float64",
      "_type": "Value"
    },
    "url": {
      "dtype": "string",
      "_type": "Value"
    },
    "__index_level_0__": {
      "dtype": "string",
      "_type": "Value"
    },
    "embeddings": {
      "feature": {
        "dtype": "float32",
        "_type": "Value"
      },
      "_type": "Sequence"
    }
  },
  "homepage": "",
  "license": ""
 }
--- a/janet_paper_titles_index/state.json
+++ b/janet_paper_titles_index/state.json
@ -0,0 +1,13 @@
 {
  "_data_files": [
    {
      "filename": "data-00000-of-00001.arrow"
    }
  ],
  "_fingerprint": "3866b6141213754b",
  "_format_columns": null,
  "_format_kwargs": {},
  "_format_type": null,
  "_output_all_columns": false,
  "_split": null
 }
--- a/janet_post_index/data-00000-of-00001.arrow
+++ b/janet_post_index/data-00000-of-00001.arrow
--- a/janet_post_index/dataset_info.json
+++ b/janet_post_index/dataset_info.json
@ -0,0 +1,42 @@
 {
  "citation": "",
  "description": "",
  "features": {
    "id": {
      "dtype": "int64",
      "_type": "Value"
    },
    "author": {
      "dtype": "string",
      "_type": "Value"
    },
    "content": {
      "dtype": "string",
      "_type": "Value"
    },
    "time": {
      "dtype": "int64",
      "_type": "Value"
    },
    "tags": {
      "feature": {
        "dtype": "string",
        "_type": "Value"
      },
      "_type": "Sequence"
    },
    "__index_level_0__": {
      "dtype": "string",
      "_type": "Value"
    },
    "embeddings": {
      "feature": {
        "dtype": "float32",
        "_type": "Value"
      },
      "_type": "Sequence"
    }
  },
  "homepage": "",
  "license": ""
 }
--- a/janet_post_index/state.json
+++ b/janet_post_index/state.json
@ -0,0 +1,13 @@
 {
  "_data_files": [
    {
      "filename": "data-00000-of-00001.arrow"
    }
  ],
  "_fingerprint": "4e5c812a622a33bc",
  "_format_columns": null,
  "_format_kwargs": {},
  "_format_type": null,
  "_output_all_columns": false,
  "_split": null
 }
--- a/main.py
+++ b/main.py
@ -7,10 +7,10 @@ import torch
 from flask import Flask, render_template, request, jsonify
 from flask_cors import CORS, cross_origin
 import psycopg2
-import spacy
+#import spacy
 import requests
-import spacy_transformers
+#import spacy_transformers
-import torch
+#import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, AutoModelForCausalLM
 from User import User
 from VRE import VRE
@ -22,9 +22,9 @@ import pandas as pd
 import time
 import threading
 from sentence_transformers import SentenceTransformer
-from huggingface_hub import login
+#from huggingface_hub import login
-login(token="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")
+#login(token="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")
@ -40,7 +40,7 @@ alive = "alive"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 device_flag = torch.cuda.current_device() if torch.cuda.is_available() else -1
-model_id = "/models/google-gemma"
+model_id = "/models/google-gemma" #"google/gemma-2b-it"
 dtype = torch.bfloat16
 #query_rewriter = pipeline("text2text-generation", model="castorini/t5-base-canard")
@ -52,9 +52,10 @@ dtype = torch.bfloat16
 #LLM = pipeline("text2text-generation", model="/models/google-gemma", device=device_flag)
-LLM_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
+
-LLM_model = AutoModelForCausalLM.from_pretrainedAutoModelForCausalLM.from_pretrained(
+LLM_tokenizer = AutoTokenizer.from_pretrained(model_id)
-    "google/gemma-2b-it",
+LLM_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16
 )
@ -128,13 +129,13 @@ def init_dm():
        token = request.get_json().get("token")
        status = request.get_json().get("stat")
        if status == "start":
-            logging.debug("status=start")
+            logging.info("status=start")
            message = {"stat": "waiting", "err": ""}
        elif status == "set":
-            logging.debug("status=set")
+            logging.info("status=set")
            headers = {"gcube-token": token, "Accept": "application/json"}
            if token not in users:
-                logging.debug("getting user info")
+                logging.info("getting user info")
                url = 'https://api.d4science.org/rest/2/people/profile'
                response = requests.get(url, headers=headers)
                if response.status_code == 200:
@ -175,20 +176,20 @@ def predict():
    message = {}
    try:
        if text == "<HELP_ON_START>":
-            logging.debug("help on start - inactive")
+            logging.info("help on start - inactive")
            state = {'help': True, 'inactive': False, 'modified_query':"", 'intent':""}
            dm.update(state)
            action = dm.next_action()
-            logging.debug("next action:" + action)
+            logging.info("next action:" + action)
            #response = "Hey " + users[token]['name'].split()[0] + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!"
            response = rg.gen_response(action, vrename=vre.name, username=users[token]['username'], name=users[token]['name'].split()[0])
            message = {"answer": response}
        elif text == "<RECOMMEND_ON_IDLE>":
-            logging.debug("recommend on idle - inactive")
+            logging.info("recommend on idle - inactive")
            state = {'help': False, 'inactive': True, 'modified_query':"recommed: ", 'intent':""}
            dm.update(state)
            action = dm.next_action()
-            logging.debug("next action:" + action)
+            logging.info("next action:" + action)
            #response = "Hey " + users[token]['name'].split()[0] + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!"
            response = rg.gen_response(action, username=users[token]['username'],name=users[token]['name'].split()[0], vrename=vre.name)
@ -213,7 +214,7 @@ user: {text}""")
            #    rec.generate_recommendations(users[token]['username'], new_user_interests, new_vre_material)
            dm.update(state)
            action = dm.next_action()
-            logging.debug("Next action: " + action)
+            logging.info("Next action: " + action)
            #response = rg.gen_response(action=action, utterance=state['modified_query'], state=dm.get_recent_state(), consec_history=dm.get_consec_history(), chitchat_history=dm.get_chitchat_history(), vrename=vre.name, username=users[token]['username'], name=users[token]['name'].split()[0])
            #message = {"answer": response, "query": text, "cand": "candidate", "history": dm.get_consec_history(), "modQuery": state['modified_query']}
            message = {"answer": state['modified_query'], "query": text, "cand": "candidate", "history": dm.get_history(), "modQuery": state['modified_query']}
--- a/requirements_main.txt
+++ b/requirements_main.txt
@ -1,40 +1,31 @@
 faiss-gpu==1.7.2
-Flask==1.1.4
+jinja2==3.0.0
 Flask
 flask-cors==3.0.10
 protobuf==3.20.0
 matplotlib==3.5.3
-nltk==3.7
+scikit-learn
-numpy==1.22.4
+sklearn-pandas
 pandas==1.3.5
 PyPDF2==3.0.1
 pdfquery
 html2text
 nltk
 numpy
 pandas
 regex==2022.6.2
 requests==2.25.1
-scikit-learn==1.0.2
+torch
 scipy==1.7.3
 sentencepiece==0.1.97
 sklearn-pandas==1.8.0
 spacy==3.4.4
 spacy-alignments==0.9.0
 spacy-legacy==3.0.12
 spacy-loggers==1.0.4
 spacy-transformers==1.1.9
 spacy-experimental==0.6.2
 torch @ https://download.pytorch.org/whl/cu116/torch-1.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
 torchaudio @ https://download.pytorch.org/whl/cu116/torchaudio-0.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
 torchsummary==1.5.1
 torchtext==0.14.1
 sentence-transformers
 torchvision @ https://download.pytorch.org/whl/cu116/torchvision-0.14.1%2Bcu116-cp38-cp38-linux_x86_64.whl
 tqdm==4.64.1
 transformers
 markupsafe==2.0.1
 psycopg2==2.9.5
 en-coreference-web-trf @ https://github.com/explosion/spacy-experimental/releases/download/v0.6.1/en_coreference_web_trf-3.4.0a2-py3-none-any.whl
 datasets
 itsdangerous==2.0.1
 huggingface_hub
 Werkzeug==1.0.1
		`@ -0,0 +1 @@`
							{"id":{"3":3,"2":2,"1":1},"type":{"3":"Dataset","2":"Dataset","1":"Dataset"},"resources":{"3":[{"name":"intent classification dataset","url":"https:\/\/data.d4science.net\/899P","description":""}],"2":[{"name":"offensive language dataset","url":"https:\/\/data.d4science.net\/jZME","description":""}],"1":[{"name":"validation set","url":"https:\/\/data.d4science.net\/dTLm","description":""},{"name":"training set","url":"https:\/\/data.d4science.net\/6MXr","description":""}]},"tags":{"3":["intent classification"],"2":["hate speech detection","offensive language detection"],"1":["entity extraction"]},"title":{"3":"intent classification dataset","2":"offensive language dataset","1":"custom entity extraction dataset"},"author":{"3":"ibrahim ahmed salah tawfik","2":"ibrahim ahmed salah tawfik","1":"ibrahim ahmed salah tawfik"},"notes":{"3":"this is a dataset of possible inputs and their intents. it has been \r\ndeveloped for the purposes of developing a conversational agent for the \r\nvres. the supported intents are chitchat, findpaper, finddataset, qa and \r\nsummarizepapaer.","2":"this dataset consists of input texts and their labels in terms of being \r\noffensive, hateful or neither. it can be used for developing a model for \r\ndetecting offensive language.","1":"this dataset is supposed to be used with the spacy library to develop an \r\nentity extraction model. the supported entites are topic, author, date, \r\nqualifier and resource type."},"metadata_created":{"3":1676239892.8178350925,"2":1676240043.0586650372,"1":1676240176.4970309734},"url":{"3":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/intent_classification_dataset","2":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/offensive_language_dataset","1":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/custom_entity_extraction_dataset"}}
		`@ -0,0 +1 @@`
							{"id":{"7":7,"6":6,"5":5,"4":4,"3":3,"2":2,"1":1},"type":{"7":"Paper","6":"Paper","5":"Paper","4":"Paper","3":"Paper","2":"Paper","1":"Paper"},"resources":{"7":[{"name":"reinforcement learning","url":"https:\/\/data.d4science.net\/QDPK","description":""}],"6":[{"name":"retrieval-augmented generation","url":"https:\/\/data.d4science.net\/x3Yy","description":""}],"5":[{"name":"neural approaches to conversational information retrieval","url":"https:\/\/data.d4science.net\/Fr32","description":""}],"4":[{"name":"paper","url":"https:\/\/data.d4science.org\/shub\/E_cERSSERldlBFak1pOTZ4eXJRajM3ekl4a3l0L0JBZmpENE01TGRvNEE3TnB4UEhUTENTQ1RzbnJWQVFPKzRacg==","description":"paper about chatbots"}],"3":[{"name":"learning to summarize from human feedback","url":"https:\/\/data.d4science.org\/shub\/E_cnlTU2xJMTVXbXpSTHVJcDZPQkl0eThOUGRGR3ZqaFZUZGdWUmtHb25wN2pPbW9RUDVINFdQUXl1T1dwTXY5Vw==","description":"paper about developing models for machine summarization using human \r\nfeedback"}],"2":[{"name":"deep reinforcement learning from human preferences","url":"https:\/\/data.d4science.org\/shub\/E_NUMzdFB1Q0xiRGl4S2hFa3VEcU11NExrMVppb29hT0RvdEkwWDdOdTAyMWFLeTBleGx1V2Z5Z28rVVpBSlBYbQ==","description":""}],"1":[{"name":"candela et al. - 2013 - virtual research environments an overview and a r","url":"https:\/\/data.d4science.org\/shub\/E_VjR6dHFhcU1ycDVaSWpCbTYyWnNudkpOV1FiWGpKdFdaREVuRWE1OWk3RE1yS0lzRTdOTEh4Szdlb1lTREF5aw==","description":""}]},"tags":{"7":["reinforcement learning"],"6":["qa","retrieval","answer generation","transformers"],"5":["cir","ir","conversational agents","conversational information retrieval","neural information retrieval"],"4":["chatbot","conversational agent"],"3":["deep learning","language generation","reinforcement learning","reinforcement learning from human feedback","rlhf"],"2":["reinforcement learning","reinforcement learning from human feedback","rlhf"],"1":["vre","virtual research environment"]},"title":{"7":"reinforcement learning","6":"retrieval-augmented language generation","5":"neural conversational information retrieval","4":"survey about chatbots","3":"summarizing from human feedback","2":"deep reinforcement learning from human preferences","1":"virtual research environments: an overview and a research agenda"},"author":{"7":"ibrahim ahmed salah tawfik","6":"ibrahim ahmed salah tawfik","5":"ibrahim ahmed salah tawfik","4":"ibrahim ahmed salah tawfik","3":"ibrahim ahmed salah tawfik","2":"ibrahim ahmed salah tawfik","1":"candela leonardo"},"notes":{"7":"this paper explains the main concepts and algorithms of reinforcement \r\nlearning.","6":"this paper describes an algorithm to generate answers based on paragraphs \r\npotentially containing the answer.","5":"this paper is a survey about the different applications in which neural \r\nconversational information retrieval can be used.","4":"this paper presents a survey about chatbots and conversational agents.","3":"this paper explains a method for generating machine translations from human \r\nfeedback.","2":"this paper explains how to use human preferences to generate rewards for a \r\nreinforcement learning algorithm.","1":"virtual research environments are innovative, web-based, \r\ncommunity-oriented, comprehensive, flexible, and secure working \r\nenvironments conceived to serve the needs of modern science. we overview \r\nthe existing initiatives developing these environments by highlighting the \r\nmajor distinguishing features. we envisage a future where regardless of \r\ngeographical location, scientists will be able to use their web browsers to \r\nseamlessly access data, software, and processing resources that are managed \r\nby diverse systems in separate administration domains via virtual research \r\nenvironments. we identify and discuss the major challenges that should be \r\nresolved to fully achieve the proposed vision, i.e., large-scale \r\nintegration and interoperability, sustainability, and adoption."},"metadata_created":{"7":1676130193.0102539062,"6":1676130537.5889539719,"5":1676130738.7923879623,"4":1681208191.8971168995,"3":1681446416.8742809296,"2":1681446678.2581589222,"1":1681916621.1160180569},"url":{"7":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/reinforcement_learning","6":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/retrieval-augmented_language_generation","5":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/neural_conversational_information_retrieval","4":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/survey_about_chatbots","3":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/summarizing_from_human_feedback","2":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/deep_reinforcement_learning_from_human_preferences","1":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/virtual_research_environments_an_overview_and_a_research_agenda"}}