new_workdir

This commit is contained in:
ahmed531998 2023-04-08 22:51:44 +02:00
parent 6dde6841d6
commit 195433c2ec
5 changed files with 54 additions and 24 deletions

View File

@ -8,4 +8,4 @@ RUN pip install -r requirements.txt
COPY . .
ENTRYPOINT ["python", "main_simple.py"]
ENTRYPOINT ["python", "main.py"]

View File

@ -3,7 +3,7 @@ import os
class User:
def __init__(self, username, token, num_interests=3, directory='./', interests_file='interests.json'):
def __init__(self, username, token, num_interests=3, directory='/app/', interests_file='interests.json'):
self.username = username
self.token = token
self.num_interests = num_interests

26
VRE.py
View File

@ -12,7 +12,7 @@ import time
import threading
class VRE:
def __init__(self, name, token, retriever, directory='./'):
def __init__(self, name, token, retriever, directory='/app/'):
self.name = name
self.token = token
self.catalogue_url = 'https://api.d4science.org/catalogue/items/'
@ -26,11 +26,11 @@ class VRE:
self.db = {'paper_db': pd.read_json(self.directory + self.name + '_paper.json') if os.path.isfile(self.directory + self.name + '_paper.json') else pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']),
'dataset_db': pd.read_json(self.directory + self.name + '_dataset.json') if os.path.isfile(self.directory + self.name + '_dataset.json') else pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']),
'content_db': pd.read_json(self.directory + self.name + '_content.json') if os.path.isfile(self.directory + self.name + '_content.json') else pd.DataFrame(columns=['id', 'paperid', 'content'])}
self.index = {'dataset_titles_index': None if not os.path.isfile(self.directory + 'janet_dataset_titles_index') else faiss.read_index('janet_dataset_titles_index'),
'paper_titles_index': None if not os.path.isfile(self.directory + 'janet_paper_titles_index') else faiss.read_index('janet_paper_titles_index'),
'dataset_desc_index': None if not os.path.isfile(self.directory + 'janet_dataset_desc_index') else faiss.read_index('janet_dataset_desc_index'),
'paper_desc_index': None if not os.path.isfile(self.directory + 'janet_paper_desc_index') else faiss.read_index('janet_paper_desc_index'),
'content_index': None if not os.path.isfile(self.directory + 'janet_content_index') else faiss.read_index('janet_content_index')}
self.index = {'dataset_titles_index': None if not os.path.isfile(self.directory + 'janet_dataset_titles_index') else faiss.read_index(self.directory + 'janet_dataset_titles_index'),
'paper_titles_index': None if not os.path.isfile(self.directory + 'janet_paper_titles_index') else faiss.read_index(self.directory + 'janet_paper_titles_index'),
'dataset_desc_index': None if not os.path.isfile(self.directory + 'janet_dataset_desc_index') else faiss.read_index(self.directory + 'janet_dataset_desc_index'),
'paper_desc_index': None if not os.path.isfile(self.directory + 'janet_paper_desc_index') else faiss.read_index(self.directory + 'janet_paper_desc_index'),
'content_index': None if not os.path.isfile(self.directory + 'janet_content_index') else faiss.read_index(self.directory + 'janet_content_index')}
self.new_income = False
def init(self):
@ -76,6 +76,7 @@ class VRE:
self.new_income = False
def create_index(self, db_type, attribute, index_type, filename):
filename = self.directory + filename
to_index = self.db[db_type][attribute]
for i, info in enumerate(to_index):
if i == 0:
@ -97,6 +98,7 @@ class VRE:
faiss.write_index(self.index[index_type], filename)
def populate_index(self, db_type, attribute, index_type, filename):
filename = self.directory + filename
to_index = self.db[db_type][attribute]
for info in to_index:
sentence_embedding = np.array(self.retriever.encode([info]))
@ -148,10 +150,10 @@ class VRE:
self.db['dataset_db'] = dataset_df.sort_values(by='metadata_created', ascending=True)
self.db['content_db'] = content_df
self.db['paper_db'].to_json(self.name + '_paper.json')
self.db['dataset_db'].to_json(self.name + '_dataset.json')
self.db['paper_db'].to_json(self.directory + self.name + '_paper.json')
self.db['dataset_db'].to_json(self.directory + self.name + '_dataset.json')
self.db['content_db'].to_json(self.name + '_content.json')
self.db['content_db'].to_json(self.directory + self.name + '_content.json')
# modify query
def get_vre_update(self):
@ -201,10 +203,10 @@ class VRE:
self.db['paper_db'] = pd.concat([self.db['paper_db'], paper_df.sort_values(by='metadata_created', ascending=True)])
self.db['dataset_db'] = pd.concat([self.db['dataset_db'], dataset_df.sort_values(by='metadata_created', ascending=True)])
self.db['paper_db'].to_json(self.name + '_paper.json')
self.db['dataset_db'].to_json(self.name + '_dataset.json')
self.db['paper_db'].to_json(self.directory + self.name + '_paper.json')
self.db['dataset_db'].to_json(self.directory + self.name + '_dataset.json')
self.db['content_db'] = pd.concat([self.db['content_db'], content_df])
self.db['content_db'].to_json(self.name + '_content.json')
self.db['content_db'].to_json(self.directory + self.name + '_content.json')
if not paper_df.empty or not dataset_df.empty or not content_df.empty:
self.new_income = True

18
main.py
View File

@ -28,23 +28,23 @@ cors = CORS(app, resources={r"/api/predict": {"origins": url},
r"/api/feedback": {"origins": url},
r"/health": {"origins": "*"}
})
"""
conn = psycopg2.connect(
host="janet-pg",
database=os.getenv("POSTGRES_DB"),
user=os.getenv("POSTGRES_USER"),
password=os.getenv("POSTGRES_PASSWORD"))
"""
"""
conn = psycopg2.connect(host="https://janet-app-db.d4science.org",
database="janet",
user="janet_user",
password="2fb5e81fec5a2d906a04")
"""
"""
cur = conn.cursor()
"""
def vre_fetch():
while True:
@ -62,7 +62,7 @@ def user_interest_decay():
user.decay_interests()
@app.route("/health", methods=['GET'])
def check_health():
def health():
return "Success", 200
@app.route("/api/predict", methods=['POST'])
@ -111,7 +111,7 @@ def predict():
def feedback():
data = request.get_json().get("feedback")
print(data)
"""
cur.execute('INSERT INTO feedback_trial (query, history, janet_modified_query, is_modified_query_correct, user_modified_query, response, preferred_response, response_length_feedback, response_fluency_feedback, response_truth_feedback, response_useful_feedback, response_time_feedback, response_intent) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)',
(data['query'], data['history'], data['modQuery'],
data['queryModCorrect'], data['correctQuery'],
@ -120,7 +120,7 @@ def feedback():
data['speed'], data['intent'])
)
conn.commit()
"""
reply = jsonify({"status": "done"})
return reply
@ -167,7 +167,7 @@ if __name__ == "__main__":
rg = ResponseGenerator(index,db, rec, generators, retriever)
"""
cur.execute('CREATE TABLE IF NOT EXISTS feedback_trial (id serial PRIMARY KEY,'
'query text NOT NULL,'
'history text NOT NULL,'
@ -184,5 +184,5 @@ if __name__ == "__main__":
'response_intent text NOT NULL);'
)
conn.commit()
"""
app.run(host='0.0.0.0', port=4000)

View File

@ -1,7 +1,35 @@
pandas==1.3.5
faiss-gpu==1.7.2
Flask==1.1.4
flask-cors==3.0.10
protobuf==3.20.0
matplotlib==3.5.3
nltk==3.7
numpy==1.22.4
pandas==1.3.5
PyPDF2==3.0.1
regex==2022.6.2
requests==2.25.1
scikit-learn==1.0.2
scipy==1.7.3
sentencepiece==0.1.97
sklearn-pandas==1.8.0
spacy==3.4.4
spacy-alignments==0.9.0
spacy-legacy==3.0.12
spacy-loggers==1.0.4
spacy-transformers==1.1.9
spacy-experimental==0.6.2
torch @ https://download.pytorch.org/whl/cu116/torch-1.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
torchaudio @ https://download.pytorch.org/whl/cu116/torchaudio-0.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
torchsummary==1.5.1
torchtext==0.14.1
sentence-transformers
torchvision @ https://download.pytorch.org/whl/cu116/torchvision-0.14.1%2Bcu116-cp38-cp38-linux_x86_64.whl
tqdm==4.64.1
transformers
markupsafe==2.0.1
psycopg2==2.9.5
en-coreference-web-trf @ https://github.com/explosion/spacy-experimental/releases/download/v0.6.1/en_coreference_web_trf-3.4.0a2-py3-none-any.whl
Werkzeug==1.0.1