new_workdir
This commit is contained in:
parent
6dde6841d6
commit
195433c2ec
|
@ -8,4 +8,4 @@ RUN pip install -r requirements.txt
|
|||
|
||||
COPY . .
|
||||
|
||||
ENTRYPOINT ["python", "main_simple.py"]
|
||||
ENTRYPOINT ["python", "main.py"]
|
||||
|
|
2
User.py
2
User.py
|
@ -3,7 +3,7 @@ import os
|
|||
|
||||
|
||||
class User:
|
||||
def __init__(self, username, token, num_interests=3, directory='./', interests_file='interests.json'):
|
||||
def __init__(self, username, token, num_interests=3, directory='/app/', interests_file='interests.json'):
|
||||
self.username = username
|
||||
self.token = token
|
||||
self.num_interests = num_interests
|
||||
|
|
26
VRE.py
26
VRE.py
|
@ -12,7 +12,7 @@ import time
|
|||
import threading
|
||||
|
||||
class VRE:
|
||||
def __init__(self, name, token, retriever, directory='./'):
|
||||
def __init__(self, name, token, retriever, directory='/app/'):
|
||||
self.name = name
|
||||
self.token = token
|
||||
self.catalogue_url = 'https://api.d4science.org/catalogue/items/'
|
||||
|
@ -26,11 +26,11 @@ class VRE:
|
|||
self.db = {'paper_db': pd.read_json(self.directory + self.name + '_paper.json') if os.path.isfile(self.directory + self.name + '_paper.json') else pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']),
|
||||
'dataset_db': pd.read_json(self.directory + self.name + '_dataset.json') if os.path.isfile(self.directory + self.name + '_dataset.json') else pd.DataFrame(columns=['id', 'type', 'resources', 'tags', 'title', 'author', 'notes', 'metadata_created']),
|
||||
'content_db': pd.read_json(self.directory + self.name + '_content.json') if os.path.isfile(self.directory + self.name + '_content.json') else pd.DataFrame(columns=['id', 'paperid', 'content'])}
|
||||
self.index = {'dataset_titles_index': None if not os.path.isfile(self.directory + 'janet_dataset_titles_index') else faiss.read_index('janet_dataset_titles_index'),
|
||||
'paper_titles_index': None if not os.path.isfile(self.directory + 'janet_paper_titles_index') else faiss.read_index('janet_paper_titles_index'),
|
||||
'dataset_desc_index': None if not os.path.isfile(self.directory + 'janet_dataset_desc_index') else faiss.read_index('janet_dataset_desc_index'),
|
||||
'paper_desc_index': None if not os.path.isfile(self.directory + 'janet_paper_desc_index') else faiss.read_index('janet_paper_desc_index'),
|
||||
'content_index': None if not os.path.isfile(self.directory + 'janet_content_index') else faiss.read_index('janet_content_index')}
|
||||
self.index = {'dataset_titles_index': None if not os.path.isfile(self.directory + 'janet_dataset_titles_index') else faiss.read_index(self.directory + 'janet_dataset_titles_index'),
|
||||
'paper_titles_index': None if not os.path.isfile(self.directory + 'janet_paper_titles_index') else faiss.read_index(self.directory + 'janet_paper_titles_index'),
|
||||
'dataset_desc_index': None if not os.path.isfile(self.directory + 'janet_dataset_desc_index') else faiss.read_index(self.directory + 'janet_dataset_desc_index'),
|
||||
'paper_desc_index': None if not os.path.isfile(self.directory + 'janet_paper_desc_index') else faiss.read_index(self.directory + 'janet_paper_desc_index'),
|
||||
'content_index': None if not os.path.isfile(self.directory + 'janet_content_index') else faiss.read_index(self.directory + 'janet_content_index')}
|
||||
self.new_income = False
|
||||
|
||||
def init(self):
|
||||
|
@ -76,6 +76,7 @@ class VRE:
|
|||
self.new_income = False
|
||||
|
||||
def create_index(self, db_type, attribute, index_type, filename):
|
||||
filename = self.directory + filename
|
||||
to_index = self.db[db_type][attribute]
|
||||
for i, info in enumerate(to_index):
|
||||
if i == 0:
|
||||
|
@ -97,6 +98,7 @@ class VRE:
|
|||
faiss.write_index(self.index[index_type], filename)
|
||||
|
||||
def populate_index(self, db_type, attribute, index_type, filename):
|
||||
filename = self.directory + filename
|
||||
to_index = self.db[db_type][attribute]
|
||||
for info in to_index:
|
||||
sentence_embedding = np.array(self.retriever.encode([info]))
|
||||
|
@ -148,10 +150,10 @@ class VRE:
|
|||
self.db['dataset_db'] = dataset_df.sort_values(by='metadata_created', ascending=True)
|
||||
self.db['content_db'] = content_df
|
||||
|
||||
self.db['paper_db'].to_json(self.name + '_paper.json')
|
||||
self.db['dataset_db'].to_json(self.name + '_dataset.json')
|
||||
self.db['paper_db'].to_json(self.directory + self.name + '_paper.json')
|
||||
self.db['dataset_db'].to_json(self.directory + self.name + '_dataset.json')
|
||||
|
||||
self.db['content_db'].to_json(self.name + '_content.json')
|
||||
self.db['content_db'].to_json(self.directory + self.name + '_content.json')
|
||||
|
||||
# modify query
|
||||
def get_vre_update(self):
|
||||
|
@ -201,10 +203,10 @@ class VRE:
|
|||
self.db['paper_db'] = pd.concat([self.db['paper_db'], paper_df.sort_values(by='metadata_created', ascending=True)])
|
||||
self.db['dataset_db'] = pd.concat([self.db['dataset_db'], dataset_df.sort_values(by='metadata_created', ascending=True)])
|
||||
|
||||
self.db['paper_db'].to_json(self.name + '_paper.json')
|
||||
self.db['dataset_db'].to_json(self.name + '_dataset.json')
|
||||
self.db['paper_db'].to_json(self.directory + self.name + '_paper.json')
|
||||
self.db['dataset_db'].to_json(self.directory + self.name + '_dataset.json')
|
||||
self.db['content_db'] = pd.concat([self.db['content_db'], content_df])
|
||||
self.db['content_db'].to_json(self.name + '_content.json')
|
||||
self.db['content_db'].to_json(self.directory + self.name + '_content.json')
|
||||
if not paper_df.empty or not dataset_df.empty or not content_df.empty:
|
||||
self.new_income = True
|
||||
|
||||
|
|
18
main.py
18
main.py
|
@ -28,23 +28,23 @@ cors = CORS(app, resources={r"/api/predict": {"origins": url},
|
|||
r"/api/feedback": {"origins": url},
|
||||
r"/health": {"origins": "*"}
|
||||
})
|
||||
"""
|
||||
|
||||
conn = psycopg2.connect(
|
||||
host="janet-pg",
|
||||
database=os.getenv("POSTGRES_DB"),
|
||||
user=os.getenv("POSTGRES_USER"),
|
||||
password=os.getenv("POSTGRES_PASSWORD"))
|
||||
|
||||
"""
|
||||
|
||||
"""
|
||||
conn = psycopg2.connect(host="https://janet-app-db.d4science.org",
|
||||
database="janet",
|
||||
user="janet_user",
|
||||
password="2fb5e81fec5a2d906a04")
|
||||
"""
|
||||
"""
|
||||
|
||||
cur = conn.cursor()
|
||||
"""
|
||||
|
||||
|
||||
def vre_fetch():
|
||||
while True:
|
||||
|
@ -62,7 +62,7 @@ def user_interest_decay():
|
|||
user.decay_interests()
|
||||
|
||||
@app.route("/health", methods=['GET'])
|
||||
def check_health():
|
||||
def health():
|
||||
return "Success", 200
|
||||
|
||||
@app.route("/api/predict", methods=['POST'])
|
||||
|
@ -111,7 +111,7 @@ def predict():
|
|||
def feedback():
|
||||
data = request.get_json().get("feedback")
|
||||
print(data)
|
||||
"""
|
||||
|
||||
cur.execute('INSERT INTO feedback_trial (query, history, janet_modified_query, is_modified_query_correct, user_modified_query, response, preferred_response, response_length_feedback, response_fluency_feedback, response_truth_feedback, response_useful_feedback, response_time_feedback, response_intent) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)',
|
||||
(data['query'], data['history'], data['modQuery'],
|
||||
data['queryModCorrect'], data['correctQuery'],
|
||||
|
@ -120,7 +120,7 @@ def feedback():
|
|||
data['speed'], data['intent'])
|
||||
)
|
||||
conn.commit()
|
||||
"""
|
||||
|
||||
reply = jsonify({"status": "done"})
|
||||
return reply
|
||||
|
||||
|
@ -167,7 +167,7 @@ if __name__ == "__main__":
|
|||
|
||||
rg = ResponseGenerator(index,db, rec, generators, retriever)
|
||||
|
||||
"""
|
||||
|
||||
cur.execute('CREATE TABLE IF NOT EXISTS feedback_trial (id serial PRIMARY KEY,'
|
||||
'query text NOT NULL,'
|
||||
'history text NOT NULL,'
|
||||
|
@ -184,5 +184,5 @@ if __name__ == "__main__":
|
|||
'response_intent text NOT NULL);'
|
||||
)
|
||||
conn.commit()
|
||||
"""
|
||||
|
||||
app.run(host='0.0.0.0', port=4000)
|
||||
|
|
|
@ -1,7 +1,35 @@
|
|||
pandas==1.3.5
|
||||
faiss-gpu==1.7.2
|
||||
Flask==1.1.4
|
||||
flask-cors==3.0.10
|
||||
protobuf==3.20.0
|
||||
matplotlib==3.5.3
|
||||
nltk==3.7
|
||||
numpy==1.22.4
|
||||
pandas==1.3.5
|
||||
PyPDF2==3.0.1
|
||||
regex==2022.6.2
|
||||
requests==2.25.1
|
||||
scikit-learn==1.0.2
|
||||
scipy==1.7.3
|
||||
sentencepiece==0.1.97
|
||||
sklearn-pandas==1.8.0
|
||||
spacy==3.4.4
|
||||
spacy-alignments==0.9.0
|
||||
spacy-legacy==3.0.12
|
||||
spacy-loggers==1.0.4
|
||||
spacy-transformers==1.1.9
|
||||
spacy-experimental==0.6.2
|
||||
torch @ https://download.pytorch.org/whl/cu116/torch-1.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
|
||||
torchaudio @ https://download.pytorch.org/whl/cu116/torchaudio-0.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl
|
||||
torchsummary==1.5.1
|
||||
torchtext==0.14.1
|
||||
sentence-transformers
|
||||
torchvision @ https://download.pytorch.org/whl/cu116/torchvision-0.14.1%2Bcu116-cp38-cp38-linux_x86_64.whl
|
||||
tqdm==4.64.1
|
||||
transformers
|
||||
markupsafe==2.0.1
|
||||
psycopg2==2.9.5
|
||||
en-coreference-web-trf @ https://github.com/explosion/spacy-experimental/releases/download/v0.6.1/en_coreference_web_trf-3.4.0a2-py3-none-any.whl
|
||||
Werkzeug==1.0.1
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue