diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b5af800 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +.git +__pycache__ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8e7cd5b --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +janet.pdf +__pycache__ +ahmed.ibrahim39699_interests.json diff --git a/Dockerfile b/Dockerfile index 79b1816..994fdd0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,12 +2,12 @@ FROM python:3.8 WORKDIR /backend_janet -COPY requirements_main.txt . +COPY requirements_simple.txt . -RUN pip install -r requirements_main.txt +RUN pip install -r requirements_simple.txt RUN rm -fr /root/.cache/* COPY . . -ENTRYPOINT ["python", "main.py"] +ENTRYPOINT ["python", "main_simple.py"] diff --git a/Jenkinsfile b/Jenkinsfile deleted file mode 100644 index e64d481..0000000 --- a/Jenkinsfile +++ /dev/null @@ -1,67 +0,0 @@ -pipeline { - agent { - label 'docker' - } - environment { - imagename = "d4science/janetbackend" //TO FILL WITH THE RIGHT VALUE (RepositoryName) e.g. d4science/RepositoryName - registryCredential = 'e348bfab-5580-4db6-b0e0-d854966bde08' - dockerImage = '' - git_url='https://code-repo.d4science.org/ahmed.ibrahim39699/JanetBackEnd.git' // SET HERE THE URL OF YOUR NEW GIT PROJECT - } - stages { - stage('Cloning Git') { - steps { - git([url: git_url, branch: 'main', credentialsId: '88b54962-1c0e-49cb-8155-22276860f346']) - - } - } - stage('Building image') { - steps{ - script { - dockerImage = docker.build imagename - } - } - } - stage('Deploy Image') { - steps{ - script { - docker.withRegistry( '', registryCredential ) { - dockerImage.push("$BUILD_NUMBER") - dockerImage.push('latest') - - } - } - } - post { - success { - sh 'echo triggering the portainer service webhook' - sh 'curl -X POST https://portainer.d4science.org/api/webhooks/7912a372-ce48-4931-b85e-28694c93b1e8' - } - } - } - stage('Remove Unused docker image') { - steps{ - sh "docker rmi $imagename:$BUILD_NUMBER" - sh "docker rmi $imagename:latest" - - } - } - } - // post-build actions - post { - success { - echo 'The docker pipeline worked!' - emailext to: 'jenkinsbuilds@d4science.org', - subject: "[Jenkins DockerPipeline D4S] build ${currentBuild.fullDisplayName} worked", - body: "Build time: ${currentBuild.durationString}. See ${env.BUILD_URL}" - } - failure { - echo 'The docker pipeline has failed' - emailext attachLog: true, - to: 'jenkinsbuilds@d4science.org', - subject: "[Jenkins DockerPipeline D4S] build ${currentBuild.fullDisplayName} failed for image ${imagename}", - body: "Something is wrong with ${env.BUILD_URL}" - } - } -} - diff --git a/__pycache__/DM.cpython-38.pyc b/__pycache__/DM.cpython-38.pyc new file mode 100644 index 0000000..5b45496 Binary files /dev/null and b/__pycache__/DM.cpython-38.pyc differ diff --git a/__pycache__/NLU.cpython-38.pyc b/__pycache__/NLU.cpython-38.pyc new file mode 100644 index 0000000..4b4bb42 Binary files /dev/null and b/__pycache__/NLU.cpython-38.pyc differ diff --git a/__pycache__/Recommender.cpython-38.pyc b/__pycache__/Recommender.cpython-38.pyc new file mode 100644 index 0000000..d75c860 Binary files /dev/null and b/__pycache__/Recommender.cpython-38.pyc differ diff --git a/__pycache__/ResponseGenerator.cpython-38.pyc b/__pycache__/ResponseGenerator.cpython-38.pyc new file mode 100644 index 0000000..b8b3626 Binary files /dev/null and b/__pycache__/ResponseGenerator.cpython-38.pyc differ diff --git a/__pycache__/User.cpython-38.pyc b/__pycache__/User.cpython-38.pyc new file mode 100644 index 0000000..dea9713 Binary files /dev/null and b/__pycache__/User.cpython-38.pyc differ diff --git a/__pycache__/VRE.cpython-38.pyc b/__pycache__/VRE.cpython-38.pyc new file mode 100644 index 0000000..42f31e9 Binary files /dev/null and b/__pycache__/VRE.cpython-38.pyc differ diff --git a/ahmed.ibrahim39699_interests.json b/ahmed.ibrahim39699_interests.json new file mode 100644 index 0000000..db1d9f8 --- /dev/null +++ b/ahmed.ibrahim39699_interests.json @@ -0,0 +1 @@ +{"interest":{"0":"chatbots?","1":"list commands","2":"chatbots"},"frequency":{"0":2,"1":1,"2":1}} \ No newline at end of file diff --git a/janet.pdf b/janet.pdf new file mode 100644 index 0000000..dca6d4c Binary files /dev/null and b/janet.pdf differ diff --git a/main.py b/main.py index 3282cab..e8c7871 100644 --- a/main.py +++ b/main.py @@ -34,19 +34,43 @@ cors = CORS(app, resources={r"/api/predict": {"origins": url}, users = {} alive = "alive" -def vre_fetch(): +device = "cuda" if torch.cuda.is_available() else "cpu" +device_flag = torch.cuda.current_device() if torch.cuda.is_available() else -1 + +query_rewriter = pipeline("text2text-generation", model="castorini/t5-base-canard") +intent_classifier = pipeline("sentiment-analysis", model='/models/intent_classifier', device=device_flag) +entity_extractor = spacy.load("/models/entity_extractor") +offensive_classifier = pipeline("sentiment-analysis", model='/models/offensive_classifier', device=device_flag) +ambig_classifier = pipeline("sentiment-analysis", model='/models/ambig_classifier', device=device_flag) +coref_resolver = spacy.load("en_coreference_web_trf") + +nlu = NLU(query_rewriter, coref_resolver, intent_classifier, offensive_classifier, entity_extractor, ambig_classifier) + +#load retriever and generator +retriever = SentenceTransformer('/models/retriever/').to(device) +qa_generator = pipeline("text2text-generation", model="/models/train_qa", device=device_flag) +summ_generator = pipeline("text2text-generation", model="/models/train_summ", device=device_flag) +chat_generator = pipeline("text2text-generation", model="/models/train_chat", device=device_flag) +amb_generator = pipeline("text2text-generation", model="/models/train_amb_gen", device=device_flag) +generators = {'qa': qa_generator, + 'chat': chat_generator, + 'amb': amb_generator, + 'summ': summ_generator} +rec = Recommender(retriever) + +def vre_fetch(token): while True: try: time.sleep(1000) print('getting new material') - #users[token]['args']['vre'].get_vre_update() - #users[token]['args']['vre'].index_periodic_update() - #users[token]['args']['rg'].update_index(vre.get_index()) - #users[token]['args']['rg'].update_db(vre.get_db()) - vre.get_vre_update() - vre.index_periodic_update() - rg.update_index(vre.get_index()) - rg.update_db(vre.get_db()) + users[token]['vre'].get_vre_update() + users[token]['vre'].index_periodic_update() + users[token]['rg'].update_index(vre.get_index()) + users[token]['rg'].update_db(vre.get_db()) + #vre.get_vre_update() + #vre.index_periodic_update() + #rg.update_index(vre.get_index()) + #rg.update_db(vre.get_db()) except Exception as e: alive = "dead_vre_fetch" @@ -89,7 +113,7 @@ def init_dm(): token = request.get_json().get("token") status = request.get_json().get("stat") if status == "start": - message = {"stat": "waiting"} + message = {"stat": "waiting", "err": ""} elif status == "set": headers = {"gcube-token": token, "Accept": "application/json"} if token not in users: @@ -98,19 +122,26 @@ def init_dm(): if response.status_code == 200: username = response.json()['result']['username'] name = response.json()['result']['fullname'] + + vre = VRE("assistedlab", token, retriever) + vre.init() + index = vre.get_index() + db = vre.get_db() + + rg = ResponseGenerator(index,db, rec, generators, retriever) - users[token] = {'username': username, 'name': name, 'dm': DM(), 'activity': 0, 'user': User(username, token)} + users[token] = {'username': username, 'name': name, 'dm': DM(), 'activity': 0, 'user': User(username, token), 'vre': vre, 'rg': rg} threading.Thread(target=user_interest_decay, args=(token,), name='decayinterest_'+users[token]['username']).start() - - message = {"stat": "done"} + threading.Thread(target=vre_fetch, name='updatevre'+users[token]['username'], args=(token,)).start() + message = {"stat": "done", "err": ""} else: - message = {"stat": "rejected"} + message = {"stat": "rejected", "err": ""} else: - message = {"stat": "done"} + message = {"stat": "done", "err": ""} return message except Exception as e: - message = {"stat": "init_dm_error"} + message = {"stat": "init_dm_error", "err": str(e)} return message @@ -120,8 +151,8 @@ def predict(): token = request.get_json().get("token") dm = users[token]['dm'] user = users[token]['user'] - #rg = users[token]['args']['rg'] - #vre = users[token]['args']['vre'] + rg = users[token]['rg'] + vre = users[token]['vre'] message = {} try: if text == "": @@ -167,8 +198,8 @@ def predict(): users[token]['dm'] = dm users[token]['user'] = user users[token]['activity'] = 0 - #users[token]['args']['vre'] = vre - #users[token]['args']['rg'] = rg + users[token]['vre'] = vre + users[token]['rg'] = rg return reply except Exception as e: message = {"answer": str(e), "query": "", "cand": "candidate", "history": "", "modQuery": ""} @@ -200,47 +231,6 @@ def feedback(): if __name__ == "__main__": warnings.filterwarnings("ignore") - device = "cuda" if torch.cuda.is_available() else "cpu" - device_flag = torch.cuda.current_device() if torch.cuda.is_available() else -1 - - query_rewriter = pipeline("text2text-generation", model="castorini/t5-base-canard") - intent_classifier = pipeline("sentiment-analysis", model='/models/intent_classifier', device=device_flag) - entity_extractor = spacy.load("/models/entity_extractor") - offensive_classifier = pipeline("sentiment-analysis", model='/models/offensive_classifier', device=device_flag) - ambig_classifier = pipeline("sentiment-analysis", model='/models/ambig_classifier', device=device_flag) - coref_resolver = spacy.load("en_coreference_web_trf") - - nlu = NLU(query_rewriter, coref_resolver, intent_classifier, offensive_classifier, entity_extractor, ambig_classifier) - - #load retriever and generator - retriever = SentenceTransformer('/models/retriever/').to(device) - qa_generator = pipeline("text2text-generation", model="/models/train_qa", device=device_flag) - summ_generator = pipeline("text2text-generation", model="/models/train_summ", device=device_flag) - chat_generator = pipeline("text2text-generation", model="/models/train_chat", device=device_flag) - amb_generator = pipeline("text2text-generation", model="/models/train_amb_gen", device=device_flag) - generators = {'qa': qa_generator, - 'chat': chat_generator, - 'amb': amb_generator, - 'summ': summ_generator} - rec = Recommender(retriever) - vre = VRE("assistedlab", '2c1e8f88-461c-42c0-8cc1-b7660771c9a3-843339462', retriever) - vre.init() - index = vre.get_index() - db = vre.get_db() - rg = ResponseGenerator(index,db, rec, generators, retriever) - del retriever - del generators - del qa_generator - del chat_generator - del summ_generator - del amb_generator - del query_rewriter - del intent_classifier - del entity_extractor - del offensive_classifier - del ambig_classifier - del coref_resolver - threading.Thread(target=vre_fetch, name='updatevre').start() threading.Thread(target=clear_inactive, name='clear').start() """ conn = psycopg2.connect(host="janet-pg", database=os.getenv("POSTGRES_DB"), user=os.getenv("POSTGRES_USER"), password=os.getenv("POSTGRES_PASSWORD")) diff --git a/main_simple.py b/main_simple.py index ce7157a..d2c87e0 100644 --- a/main_simple.py +++ b/main_simple.py @@ -5,6 +5,9 @@ import shutil import re import requests import time +from User import User +from DM import DM +import threading app = Flask(__name__) url = os.getenv("FRONTEND_URL_WITH_PORT") cors = CORS(app, resources={r"/api/predict": {"origins": url}, @@ -13,31 +16,54 @@ cors = CORS(app, resources={r"/api/predict": {"origins": url}, r"/health": {"origins": "*"} }) users = {} - +alive = "alive" +def user_interest_decay(token): + while True: + try: + if token in users: + print("decaying interests after 3 minutes for " + users[token]['username']) + time.sleep(180) + users[token]['user'].decay_interests() + else: + break + except Exception as e: + alive = "dead_interest_decay" @app.route("/health", methods=['GET']) def health(): - return "Success", 200 + if alive=="alive": + return "Success", 200 + else: + return alive, 500 @app.route("/api/dm", methods=['POST']) def init_dm(): - token = request.get_json().get("token") - status = request.get_json().get("stat") - if status == "start": - message = {"stat": "waiting"} - elif status == "set": - headers = {"gcube-token": token, "Accept": "application/json"} - if token not in users: - url = 'https://api.d4science.org/rest/2/people/profile' - response = requests.get(url, headers=headers) - if response.status_code == 200: - username = response.json()['result']['username'] - name = response.json()['result']['fullname'] - message = {"stat": "done"} + try: + token = request.get_json().get("token") + status = request.get_json().get("stat") + if status == "start": + message = {"stat": "waiting", "err": ""} + elif status == "set": + headers = {"gcube-token": token, "Accept": "application/json"} + if token not in users: + url = 'https://api.d4science.org/rest/2/people/profile' + response = requests.get(url, headers=headers) + if response.status_code == 200: + username = response.json()['result']['username'] + name = response.json()['result']['fullname'] + + users[token] = {'username': username, 'name': name, 'dm': DM(), 'activity': 0, 'user': User(username, token)} + + threading.Thread(target=user_interest_decay, args=(token,), name='decayinterest_'+users[token]['username']).start() + + message = {"stat": "done", "err": ""} + else: + message = {"stat": "rejected", "err": ""} else: - message = {"stat": "rejected"} - else: - message = {"stat": "done"} - return message + message = {"stat": "done", "err": ""} + return message + except Exception as e: + message = {"stat": "init_dm_error", "err": str(e)} + return message @app.route("/api/predict", methods=['POST']) def predict(): time.sleep(10) @@ -54,7 +80,7 @@ def feedback(): return reply if __name__ == "__main__": - + """ folder = '/app' for filename in os.listdir(folder): file_path = os.path.join(folder, filename) @@ -65,4 +91,5 @@ if __name__ == "__main__": shutil.rmtree(file_path) except Exception as e: print('Failed to delete %s. Reason: %s' % (file_path, e)) + """ app.run(host='0.0.0.0') diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index fb97dd9..0000000 --- a/requirements.txt +++ /dev/null @@ -1,38 +0,0 @@ -faiss-gpu==1.7.2 -Flask==1.1.4 -flask-cors==3.0.10 -protobuf==3.20.0 -matplotlib==3.5.3 -nltk==3.7 -numpy==1.22.4 -pandas==1.3.5 -PyPDF2==3.0.1 -pdfquery -html2text -regex==2022.6.2 -requests==2.25.1 -scikit-learn==1.0.2 -scipy==1.7.3 -sentencepiece==0.1.97 -sklearn-pandas==1.8.0 -spacy==3.4.4 -spacy-alignments==0.9.0 -spacy-legacy==3.0.12 -spacy-loggers==1.0.4 -spacy-transformers==1.1.9 -spacy-experimental==0.6.2 -torch @ https://download.pytorch.org/whl/cu116/torch-1.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl -torchaudio @ https://download.pytorch.org/whl/cu116/torchaudio-0.13.1%2Bcu116-cp38-cp38-linux_x86_64.whl -torchsummary==1.5.1 -torchtext==0.14.1 -sentence-transformers -torchvision @ https://download.pytorch.org/whl/cu116/torchvision-0.14.1%2Bcu116-cp38-cp38-linux_x86_64.whl -tqdm==4.64.1 -transformers -markupsafe==2.0.1 -psycopg2==2.9.5 -en-coreference-web-trf @ https://github.com/explosion/spacy-experimental/releases/download/v0.6.1/en_coreference_web_trf-3.4.0a2-py3-none-any.whl -Werkzeug==1.0.1 - - -