working locally

This commit is contained in:
Ahmed Salah Tawfik Ibrahim 2024-05-31 01:32:42 +02:00
parent 1efd0ac18d
commit 337da3f730
26 changed files with 493 additions and 40 deletions

6
NLU.py
View File

@ -1,5 +1,5 @@
import spacy #import spacy
import spacy_transformers #import spacy_transformers
import torch import torch
import logging import logging
@ -111,7 +111,7 @@ class NLU:
outputs = self.model.generate(input_ids=inputs, max_new_tokens=150) outputs = self.model.generate(input_ids=inputs, max_new_tokens=150)
goal = self.tokenizer.decode(outputs[0]) goal = self.tokenizer.decode(outputs[0])
logging.debug("User's goal is:" + goal) logging.info("User's goal is:" + goal)
#return goal.split("<start_of_turn>model\n")[-1].split("<eos>")[0] #return goal.split("<start_of_turn>model\n")[-1].split("<eos>")[0]
return {"modified_query": goal.split("<start_of_turn>model\n")[-1].split("<eos>")[0], return {"modified_query": goal.split("<start_of_turn>model\n")[-1].split("<eos>")[0],

1
assistedlab_content.json Normal file

File diff suppressed because one or more lines are too long

1
assistedlab_dataset.json Normal file
View File

@ -0,0 +1 @@
{"id":{"3":3,"2":2,"1":1},"type":{"3":"Dataset","2":"Dataset","1":"Dataset"},"resources":{"3":[{"name":"intent classification dataset","url":"https:\/\/data.d4science.net\/899P","description":""}],"2":[{"name":"offensive language dataset","url":"https:\/\/data.d4science.net\/jZME","description":""}],"1":[{"name":"validation set","url":"https:\/\/data.d4science.net\/dTLm","description":""},{"name":"training set","url":"https:\/\/data.d4science.net\/6MXr","description":""}]},"tags":{"3":["intent classification"],"2":["hate speech detection","offensive language detection"],"1":["entity extraction"]},"title":{"3":"intent classification dataset","2":"offensive language dataset","1":"custom entity extraction dataset"},"author":{"3":"ibrahim ahmed salah tawfik","2":"ibrahim ahmed salah tawfik","1":"ibrahim ahmed salah tawfik"},"notes":{"3":"this is a dataset of possible inputs and their intents. it has been \r\ndeveloped for the purposes of developing a conversational agent for the \r\nvres. the supported intents are chitchat, findpaper, finddataset, qa and \r\nsummarizepapaer.","2":"this dataset consists of input texts and their labels in terms of being \r\noffensive, hateful or neither. it can be used for developing a model for \r\ndetecting offensive language.","1":"this dataset is supposed to be used with the spacy library to develop an \r\nentity extraction model. the supported entites are topic, author, date, \r\nqualifier and resource type."},"metadata_created":{"3":1676239892.8178350925,"2":1676240043.0586650372,"1":1676240176.4970309734},"url":{"3":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/intent_classification_dataset","2":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/offensive_language_dataset","1":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/custom_entity_extraction_dataset"}}

1
assistedlab_paper.json Normal file
View File

@ -0,0 +1 @@
{"id":{"7":7,"6":6,"5":5,"4":4,"3":3,"2":2,"1":1},"type":{"7":"Paper","6":"Paper","5":"Paper","4":"Paper","3":"Paper","2":"Paper","1":"Paper"},"resources":{"7":[{"name":"reinforcement learning","url":"https:\/\/data.d4science.net\/QDPK","description":""}],"6":[{"name":"retrieval-augmented generation","url":"https:\/\/data.d4science.net\/x3Yy","description":""}],"5":[{"name":"neural approaches to conversational information retrieval","url":"https:\/\/data.d4science.net\/Fr32","description":""}],"4":[{"name":"paper","url":"https:\/\/data.d4science.org\/shub\/E_cERSSERldlBFak1pOTZ4eXJRajM3ekl4a3l0L0JBZmpENE01TGRvNEE3TnB4UEhUTENTQ1RzbnJWQVFPKzRacg==","description":"paper about chatbots"}],"3":[{"name":"learning to summarize from human feedback","url":"https:\/\/data.d4science.org\/shub\/E_cnlTU2xJMTVXbXpSTHVJcDZPQkl0eThOUGRGR3ZqaFZUZGdWUmtHb25wN2pPbW9RUDVINFdQUXl1T1dwTXY5Vw==","description":"paper about developing models for machine summarization using human \r\nfeedback"}],"2":[{"name":"deep reinforcement learning from human preferences","url":"https:\/\/data.d4science.org\/shub\/E_NUMzdFB1Q0xiRGl4S2hFa3VEcU11NExrMVppb29hT0RvdEkwWDdOdTAyMWFLeTBleGx1V2Z5Z28rVVpBSlBYbQ==","description":""}],"1":[{"name":"candela et al. - 2013 - virtual research environments an overview and a r","url":"https:\/\/data.d4science.org\/shub\/E_VjR6dHFhcU1ycDVaSWpCbTYyWnNudkpOV1FiWGpKdFdaREVuRWE1OWk3RE1yS0lzRTdOTEh4Szdlb1lTREF5aw==","description":""}]},"tags":{"7":["reinforcement learning"],"6":["qa","retrieval","answer generation","transformers"],"5":["cir","ir","conversational agents","conversational information retrieval","neural information retrieval"],"4":["chatbot","conversational agent"],"3":["deep learning","language generation","reinforcement learning","reinforcement learning from human feedback","rlhf"],"2":["reinforcement learning","reinforcement learning from human feedback","rlhf"],"1":["vre","virtual research environment"]},"title":{"7":"reinforcement learning","6":"retrieval-augmented language generation","5":"neural conversational information retrieval","4":"survey about chatbots","3":"summarizing from human feedback","2":"deep reinforcement learning from human preferences","1":"virtual research environments: an overview and a research agenda"},"author":{"7":"ibrahim ahmed salah tawfik","6":"ibrahim ahmed salah tawfik","5":"ibrahim ahmed salah tawfik","4":"ibrahim ahmed salah tawfik","3":"ibrahim ahmed salah tawfik","2":"ibrahim ahmed salah tawfik","1":"candela leonardo"},"notes":{"7":"this paper explains the main concepts and algorithms of reinforcement \r\nlearning.","6":"this paper describes an algorithm to generate answers based on paragraphs \r\npotentially containing the answer.","5":"this paper is a survey about the different applications in which neural \r\nconversational information retrieval can be used.","4":"this paper presents a survey about chatbots and conversational agents.","3":"this paper explains a method for generating machine translations from human \r\nfeedback.","2":"this paper explains how to use human preferences to generate rewards for a \r\nreinforcement learning algorithm.","1":"virtual research environments are innovative, web-based, \r\ncommunity-oriented, comprehensive, flexible, and secure working \r\nenvironments conceived to serve the needs of modern science. we overview \r\nthe existing initiatives developing these environments by highlighting the \r\nmajor distinguishing features. we envisage a future where regardless of \r\ngeographical location, scientists will be able to use their web browsers to \r\nseamlessly access data, software, and processing resources that are managed \r\nby diverse systems in separate administration domains via virtual research \r\nenvironments. we identify and discuss the major challenges that should be \r\nresolved to fully achieve the proposed vision, i.e., large-scale \r\nintegration and interoperability, sustainability, and adoption."},"metadata_created":{"7":1676130193.0102539062,"6":1676130537.5889539719,"5":1676130738.7923879623,"4":1681208191.8971168995,"3":1681446416.8742809296,"2":1681446678.2581589222,"1":1681916621.1160180569},"url":{"7":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/reinforcement_learning","6":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/retrieval-augmented_language_generation","5":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/neural_conversational_information_retrieval","4":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/survey_about_chatbots","3":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/summarizing_from_human_feedback","2":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/deep_reinforcement_learning_from_human_preferences","1":"https:\/\/data.d4science.org\/ctlg\/AssistedLab\/virtual_research_environments_an_overview_and_a_research_agenda"}}

1
assistedlab_post.json Normal file

File diff suppressed because one or more lines are too long

View File

@ -1 +1,27 @@
The assistedlab VRE is an environment conceived to deploy and test Jant, the conversational assistant of D4Science. It contains a catalogue which has selected papers about machine learning topics in general and conversational agents development in particular. It also contains some datasets related to these topics. The assistedlab VRE is an environment conceived to deploy and test Jant, the conversational assistant of D4Science. It contains a catalogue which has selected papers about machine learning topics in general and conversational agents development in particular. It also contains some datasets related to these topics.
#nltk==3.7
#numpy==1.22.4
#pandas==1.3.5
#scikit-learn==1.0.2
#scipy==1.7.3
#sentencepiece==0.1.97
#sklearn-pandas==1.8.0
#spacy==3.4.4
#spacy-alignments==0.9.0
#spacy-legacy==3.0.12
#spacy-loggers==1.0.4
#spacy-transformers==1.1.9
#spacy-experimental==0.6.2
torch @ https://download.pytorch.org/whl/cu116/torch-1.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
torchaudio @ https://download.pytorch.org/whl/cu116/torchaudio-0.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
torchsummary==1.5.1
torchtext==0.14.1
torchvision @ https://download.pytorch.org/whl/cu116/torchvision-0.14.1%2Bcu116-cp38-cp38-linux_x86_64.whl
en-coreference-web-trf @ https://github.com/explosion/spacy-experimental/releases/download/v0.6.1/en_coreference_web_trf-3.4.0a2-py3-none-any.whl
Flask==1.1.4

Binary file not shown.

View File

@ -0,0 +1,31 @@
{
"citation": "",
"description": "",
"features": {
"id": {
"dtype": "int64",
"_type": "Value"
},
"paperid": {
"dtype": "int64",
"_type": "Value"
},
"content": {
"dtype": "string",
"_type": "Value"
},
"__index_level_0__": {
"dtype": "string",
"_type": "Value"
},
"embeddings": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "",
"license": ""
}

View File

@ -0,0 +1,13 @@
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "99e5d23916952a82",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

Binary file not shown.

View File

@ -0,0 +1,70 @@
{
"citation": "",
"description": "",
"features": {
"id": {
"dtype": "int64",
"_type": "Value"
},
"type": {
"dtype": "string",
"_type": "Value"
},
"resources": [
{
"description": {
"dtype": "string",
"_type": "Value"
},
"name": {
"dtype": "string",
"_type": "Value"
},
"url": {
"dtype": "string",
"_type": "Value"
}
}
],
"tags": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"title": {
"dtype": "string",
"_type": "Value"
},
"author": {
"dtype": "string",
"_type": "Value"
},
"notes": {
"dtype": "string",
"_type": "Value"
},
"metadata_created": {
"dtype": "float64",
"_type": "Value"
},
"url": {
"dtype": "string",
"_type": "Value"
},
"__index_level_0__": {
"dtype": "string",
"_type": "Value"
},
"embeddings": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "",
"license": ""
}

View File

@ -0,0 +1,13 @@
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "d15ca66770ecc202",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

Binary file not shown.

View File

@ -0,0 +1,70 @@
{
"citation": "",
"description": "",
"features": {
"id": {
"dtype": "int64",
"_type": "Value"
},
"type": {
"dtype": "string",
"_type": "Value"
},
"resources": [
{
"description": {
"dtype": "string",
"_type": "Value"
},
"name": {
"dtype": "string",
"_type": "Value"
},
"url": {
"dtype": "string",
"_type": "Value"
}
}
],
"tags": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"title": {
"dtype": "string",
"_type": "Value"
},
"author": {
"dtype": "string",
"_type": "Value"
},
"notes": {
"dtype": "string",
"_type": "Value"
},
"metadata_created": {
"dtype": "float64",
"_type": "Value"
},
"url": {
"dtype": "string",
"_type": "Value"
},
"__index_level_0__": {
"dtype": "string",
"_type": "Value"
},
"embeddings": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "",
"license": ""
}

View File

@ -0,0 +1,13 @@
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "178faebb2e165622",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

Binary file not shown.

View File

@ -0,0 +1,70 @@
{
"citation": "",
"description": "",
"features": {
"id": {
"dtype": "int64",
"_type": "Value"
},
"type": {
"dtype": "string",
"_type": "Value"
},
"resources": [
{
"description": {
"dtype": "string",
"_type": "Value"
},
"name": {
"dtype": "string",
"_type": "Value"
},
"url": {
"dtype": "string",
"_type": "Value"
}
}
],
"tags": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"title": {
"dtype": "string",
"_type": "Value"
},
"author": {
"dtype": "string",
"_type": "Value"
},
"notes": {
"dtype": "string",
"_type": "Value"
},
"metadata_created": {
"dtype": "float64",
"_type": "Value"
},
"url": {
"dtype": "string",
"_type": "Value"
},
"__index_level_0__": {
"dtype": "string",
"_type": "Value"
},
"embeddings": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "",
"license": ""
}

View File

@ -0,0 +1,13 @@
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "b949183484bc0637",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

Binary file not shown.

View File

@ -0,0 +1,70 @@
{
"citation": "",
"description": "",
"features": {
"id": {
"dtype": "int64",
"_type": "Value"
},
"type": {
"dtype": "string",
"_type": "Value"
},
"resources": [
{
"description": {
"dtype": "string",
"_type": "Value"
},
"name": {
"dtype": "string",
"_type": "Value"
},
"url": {
"dtype": "string",
"_type": "Value"
}
}
],
"tags": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"title": {
"dtype": "string",
"_type": "Value"
},
"author": {
"dtype": "string",
"_type": "Value"
},
"notes": {
"dtype": "string",
"_type": "Value"
},
"metadata_created": {
"dtype": "float64",
"_type": "Value"
},
"url": {
"dtype": "string",
"_type": "Value"
},
"__index_level_0__": {
"dtype": "string",
"_type": "Value"
},
"embeddings": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "",
"license": ""
}

View File

@ -0,0 +1,13 @@
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "3866b6141213754b",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

Binary file not shown.

View File

@ -0,0 +1,42 @@
{
"citation": "",
"description": "",
"features": {
"id": {
"dtype": "int64",
"_type": "Value"
},
"author": {
"dtype": "string",
"_type": "Value"
},
"content": {
"dtype": "string",
"_type": "Value"
},
"time": {
"dtype": "int64",
"_type": "Value"
},
"tags": {
"feature": {
"dtype": "string",
"_type": "Value"
},
"_type": "Sequence"
},
"__index_level_0__": {
"dtype": "string",
"_type": "Value"
},
"embeddings": {
"feature": {
"dtype": "float32",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "",
"license": ""
}

View File

@ -0,0 +1,13 @@
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "4e5c812a622a33bc",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}

35
main.py
View File

@ -7,10 +7,10 @@ import torch
from flask import Flask, render_template, request, jsonify from flask import Flask, render_template, request, jsonify
from flask_cors import CORS, cross_origin from flask_cors import CORS, cross_origin
import psycopg2 import psycopg2
import spacy #import spacy
import requests import requests
import spacy_transformers #import spacy_transformers
import torch #import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, AutoModelForCausalLM from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, AutoModelForCausalLM
from User import User from User import User
from VRE import VRE from VRE import VRE
@ -22,9 +22,9 @@ import pandas as pd
import time import time
import threading import threading
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
from huggingface_hub import login #from huggingface_hub import login
login(token="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") #login(token="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")
@ -40,7 +40,7 @@ alive = "alive"
device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu"
device_flag = torch.cuda.current_device() if torch.cuda.is_available() else -1 device_flag = torch.cuda.current_device() if torch.cuda.is_available() else -1
model_id = "/models/google-gemma" model_id = "/models/google-gemma" #"google/gemma-2b-it"
dtype = torch.bfloat16 dtype = torch.bfloat16
#query_rewriter = pipeline("text2text-generation", model="castorini/t5-base-canard") #query_rewriter = pipeline("text2text-generation", model="castorini/t5-base-canard")
@ -52,9 +52,10 @@ dtype = torch.bfloat16
#LLM = pipeline("text2text-generation", model="/models/google-gemma", device=device_flag) #LLM = pipeline("text2text-generation", model="/models/google-gemma", device=device_flag)
LLM_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
LLM_model = AutoModelForCausalLM.from_pretrainedAutoModelForCausalLM.from_pretrained( LLM_tokenizer = AutoTokenizer.from_pretrained(model_id)
"google/gemma-2b-it", LLM_model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16 torch_dtype=torch.bfloat16
) )
@ -128,13 +129,13 @@ def init_dm():
token = request.get_json().get("token") token = request.get_json().get("token")
status = request.get_json().get("stat") status = request.get_json().get("stat")
if status == "start": if status == "start":
logging.debug("status=start") logging.info("status=start")
message = {"stat": "waiting", "err": ""} message = {"stat": "waiting", "err": ""}
elif status == "set": elif status == "set":
logging.debug("status=set") logging.info("status=set")
headers = {"gcube-token": token, "Accept": "application/json"} headers = {"gcube-token": token, "Accept": "application/json"}
if token not in users: if token not in users:
logging.debug("getting user info") logging.info("getting user info")
url = 'https://api.d4science.org/rest/2/people/profile' url = 'https://api.d4science.org/rest/2/people/profile'
response = requests.get(url, headers=headers) response = requests.get(url, headers=headers)
if response.status_code == 200: if response.status_code == 200:
@ -175,20 +176,20 @@ def predict():
message = {} message = {}
try: try:
if text == "<HELP_ON_START>": if text == "<HELP_ON_START>":
logging.debug("help on start - inactive") logging.info("help on start - inactive")
state = {'help': True, 'inactive': False, 'modified_query':"", 'intent':""} state = {'help': True, 'inactive': False, 'modified_query':"", 'intent':""}
dm.update(state) dm.update(state)
action = dm.next_action() action = dm.next_action()
logging.debug("next action:" + action) logging.info("next action:" + action)
#response = "Hey " + users[token]['name'].split()[0] + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!" #response = "Hey " + users[token]['name'].split()[0] + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!"
response = rg.gen_response(action, vrename=vre.name, username=users[token]['username'], name=users[token]['name'].split()[0]) response = rg.gen_response(action, vrename=vre.name, username=users[token]['username'], name=users[token]['name'].split()[0])
message = {"answer": response} message = {"answer": response}
elif text == "<RECOMMEND_ON_IDLE>": elif text == "<RECOMMEND_ON_IDLE>":
logging.debug("recommend on idle - inactive") logging.info("recommend on idle - inactive")
state = {'help': False, 'inactive': True, 'modified_query':"recommed: ", 'intent':""} state = {'help': False, 'inactive': True, 'modified_query':"recommed: ", 'intent':""}
dm.update(state) dm.update(state)
action = dm.next_action() action = dm.next_action()
logging.debug("next action:" + action) logging.info("next action:" + action)
#response = "Hey " + users[token]['name'].split()[0] + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!" #response = "Hey " + users[token]['name'].split()[0] + "! it's Janet! I am here to help you make use of the datasets and papers in the catalogue of the VRE. I can answer questions whose answers may be inside the papers. I can summarize papers for you. I can also chat with you. So, whichever it is, I am ready to chat!"
response = rg.gen_response(action, username=users[token]['username'],name=users[token]['name'].split()[0], vrename=vre.name) response = rg.gen_response(action, username=users[token]['username'],name=users[token]['name'].split()[0], vrename=vre.name)
@ -213,7 +214,7 @@ user: {text}""")
# rec.generate_recommendations(users[token]['username'], new_user_interests, new_vre_material) # rec.generate_recommendations(users[token]['username'], new_user_interests, new_vre_material)
dm.update(state) dm.update(state)
action = dm.next_action() action = dm.next_action()
logging.debug("Next action: " + action) logging.info("Next action: " + action)
#response = rg.gen_response(action=action, utterance=state['modified_query'], state=dm.get_recent_state(), consec_history=dm.get_consec_history(), chitchat_history=dm.get_chitchat_history(), vrename=vre.name, username=users[token]['username'], name=users[token]['name'].split()[0]) #response = rg.gen_response(action=action, utterance=state['modified_query'], state=dm.get_recent_state(), consec_history=dm.get_consec_history(), chitchat_history=dm.get_chitchat_history(), vrename=vre.name, username=users[token]['username'], name=users[token]['name'].split()[0])
#message = {"answer": response, "query": text, "cand": "candidate", "history": dm.get_consec_history(), "modQuery": state['modified_query']} #message = {"answer": response, "query": text, "cand": "candidate", "history": dm.get_consec_history(), "modQuery": state['modified_query']}
message = {"answer": state['modified_query'], "query": text, "cand": "candidate", "history": dm.get_history(), "modQuery": state['modified_query']} message = {"answer": state['modified_query'], "query": text, "cand": "candidate", "history": dm.get_history(), "modQuery": state['modified_query']}

View File

@ -1,40 +1,31 @@
faiss-gpu==1.7.2 faiss-gpu==1.7.2
Flask==1.1.4 jinja2==3.0.0
Flask
flask-cors==3.0.10 flask-cors==3.0.10
protobuf==3.20.0 protobuf==3.20.0
matplotlib==3.5.3 matplotlib==3.5.3
nltk==3.7 scikit-learn
numpy==1.22.4 sklearn-pandas
pandas==1.3.5
PyPDF2==3.0.1 PyPDF2==3.0.1
pdfquery pdfquery
html2text html2text
nltk
numpy
pandas
regex==2022.6.2 regex==2022.6.2
requests==2.25.1 requests==2.25.1
scikit-learn==1.0.2 torch
scipy==1.7.3
sentencepiece==0.1.97
sklearn-pandas==1.8.0
spacy==3.4.4
spacy-alignments==0.9.0
spacy-legacy==3.0.12
spacy-loggers==1.0.4
spacy-transformers==1.1.9
spacy-experimental==0.6.2
torch @ https://download.pytorch.org/whl/cu116/torch-1.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
torchaudio @ https://download.pytorch.org/whl/cu116/torchaudio-0.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
torchsummary==1.5.1
torchtext==0.14.1
sentence-transformers sentence-transformers
torchvision @ https://download.pytorch.org/whl/cu116/torchvision-0.14.1%2Bcu116-cp38-cp38-linux_x86_64.whl
tqdm==4.64.1 tqdm==4.64.1
transformers transformers
markupsafe==2.0.1 markupsafe==2.0.1
psycopg2==2.9.5 psycopg2==2.9.5
en-coreference-web-trf @ https://github.com/explosion/spacy-experimental/releases/download/v0.6.1/en_coreference_web_trf-3.4.0a2-py3-none-any.whl
datasets datasets
itsdangerous==2.0.1
huggingface_hub huggingface_hub
Werkzeug==1.0.1 Werkzeug==1.0.1