From 1ee1a94eff82bf4c34517f252fb74d88b4f97ab6 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Wed, 26 Jul 2023 16:41:44 +0300 Subject: [PATCH] - Handle the case where the "gatherNumberOfPayloadsPerDatasource()"-query-method is executed while some tables of the DB are in a "merge" state. In this case, the queries fail and the App retries up to 10 times. - Handle the case when the aforementioned query-method fails with some unhandleable error. In this case the app retries 1 time and if it fails again, then if this has happened during initialization, the app shuts down, else the incident is ignored and the app will retry after 6 hours when the method is scheduled to run again. --- .../components/SchedulingTasks.java | 2 +- .../services/StatsService.java | 2 +- .../services/StatsServiceImpl.java | 33 +++++++++++++++++-- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/components/SchedulingTasks.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/components/SchedulingTasks.java index cf594ed..7901d53 100644 --- a/src/main/java/eu/openaire/pdf_aggregation_statistics/components/SchedulingTasks.java +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/components/SchedulingTasks.java @@ -30,7 +30,7 @@ public class SchedulingTasks { // When the user requests the numOfPayloads for a given datasourceI, the app will return the result immediately. // It will be a quick O(1) GET operation in the ConcurrentHashMap. - if ( ! statsService.gatherNumberOfPayloadsPerDatasource() + if ( ! statsService.gatherNumberOfPayloadsPerDatasource(0) && runningFirstTime ) PdfAggregationStatisticsApplication.gentleAppShutdown(); diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java index a7c405d..d3fb2de 100644 --- a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java @@ -3,6 +3,6 @@ package eu.openaire.pdf_aggregation_statistics.services; public interface StatsService { - boolean gatherNumberOfPayloadsPerDatasource(); + boolean gatherNumberOfPayloadsPerDatasource(int retryCount); } diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java index 9c198eb..e41a9e7 100644 --- a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java @@ -28,8 +28,13 @@ public class StatsServiceImpl implements StatsService { public static final ConcurrentHashMap datasourcesWithNumOfPayloads = new ConcurrentHashMap<>(105_000); // The number of datasources is around 10_000. - public boolean gatherNumberOfPayloadsPerDatasource() + public boolean gatherNumberOfPayloadsPerDatasource(int retryCount) { + if ( retryCount > 10 ) { + logger.error("Could not find the requested payload-type table in an non-merging state, after " + retryCount + " retries!"); + return false; + } + final String getNumberOfPayloadsPerDatasourceQuery = "select d.id, count(p.id) as payload_count from " + databaseName + ".datasource d\n" + " join " + databaseName + ".publication pu on pu.datasourceid=d.id\n" + @@ -54,12 +59,34 @@ public class StatsServiceImpl implements StatsService { logger.error("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery); return false; } catch (Exception e) { - logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e); - return false; + String exMsg = e.getMessage(); + if ( (exMsg != null) && (exMsg.contains("Could not resolve table reference") || exMsg.contains("Failed to open HDFS file")) ) { + sleep2mins(); // The tables may be under merging at the moment, so sleep a bit and try again. + return gatherNumberOfPayloadsPerDatasource(++retryCount); + } else { + // If such an unknown error appears during initialization, it is fatal but not something that is so remarkable to completely avoid deploying the app to save time.. + // We allow for 1 retry, 2 Minutes later. If the error appears again then the app will shutdown. + logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e); + if ( retryCount == 0 ) { + sleep2mins(); // The DB may have some failure + return gatherNumberOfPayloadsPerDatasource(++retryCount); + } else // Already 1 retry happened and failed for the unknown error. + return false; // If the 1st retry for the unknown error failed, then do not try again. + // When this method returns, the app will either shut down if it is during initialization or it will ignore it and retry in 6 hours. + } } } // To get the human-friendly timestamp format from the BigInt in the database: // select from_timestamp(CAST(CAST(`date` as decimal(30,0))/1000 AS timestamp), "yyyy-MM-dd HH:mm:ss.SSS") from payload + + private void sleep2mins() { + try { + Thread.sleep(120_000); // Sleep for 2 mins. + } catch (InterruptedException ie) { + logger.warn("Sleeping was interrupted!"); + } + } + }