diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/components/SchedulingTasks.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/components/SchedulingTasks.java index cf594ed..7901d53 100644 --- a/src/main/java/eu/openaire/pdf_aggregation_statistics/components/SchedulingTasks.java +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/components/SchedulingTasks.java @@ -30,7 +30,7 @@ public class SchedulingTasks { // When the user requests the numOfPayloads for a given datasourceI, the app will return the result immediately. // It will be a quick O(1) GET operation in the ConcurrentHashMap. - if ( ! statsService.gatherNumberOfPayloadsPerDatasource() + if ( ! statsService.gatherNumberOfPayloadsPerDatasource(0) && runningFirstTime ) PdfAggregationStatisticsApplication.gentleAppShutdown(); diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java index a7c405d..d3fb2de 100644 --- a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java @@ -3,6 +3,6 @@ package eu.openaire.pdf_aggregation_statistics.services; public interface StatsService { - boolean gatherNumberOfPayloadsPerDatasource(); + boolean gatherNumberOfPayloadsPerDatasource(int retryCount); } diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java index 9c198eb..e41a9e7 100644 --- a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java @@ -28,8 +28,13 @@ public class StatsServiceImpl implements StatsService { public static final ConcurrentHashMap datasourcesWithNumOfPayloads = new ConcurrentHashMap<>(105_000); // The number of datasources is around 10_000. - public boolean gatherNumberOfPayloadsPerDatasource() + public boolean gatherNumberOfPayloadsPerDatasource(int retryCount) { + if ( retryCount > 10 ) { + logger.error("Could not find the requested payload-type table in an non-merging state, after " + retryCount + " retries!"); + return false; + } + final String getNumberOfPayloadsPerDatasourceQuery = "select d.id, count(p.id) as payload_count from " + databaseName + ".datasource d\n" + " join " + databaseName + ".publication pu on pu.datasourceid=d.id\n" + @@ -54,12 +59,34 @@ public class StatsServiceImpl implements StatsService { logger.error("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery); return false; } catch (Exception e) { - logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e); - return false; + String exMsg = e.getMessage(); + if ( (exMsg != null) && (exMsg.contains("Could not resolve table reference") || exMsg.contains("Failed to open HDFS file")) ) { + sleep2mins(); // The tables may be under merging at the moment, so sleep a bit and try again. + return gatherNumberOfPayloadsPerDatasource(++retryCount); + } else { + // If such an unknown error appears during initialization, it is fatal but not something that is so remarkable to completely avoid deploying the app to save time.. + // We allow for 1 retry, 2 Minutes later. If the error appears again then the app will shutdown. + logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e); + if ( retryCount == 0 ) { + sleep2mins(); // The DB may have some failure + return gatherNumberOfPayloadsPerDatasource(++retryCount); + } else // Already 1 retry happened and failed for the unknown error. + return false; // If the 1st retry for the unknown error failed, then do not try again. + // When this method returns, the app will either shut down if it is during initialization or it will ignore it and retry in 6 hours. + } } } // To get the human-friendly timestamp format from the BigInt in the database: // select from_timestamp(CAST(CAST(`date` as decimal(30,0))/1000 AS timestamp), "yyyy-MM-dd HH:mm:ss.SSS") from payload + + private void sleep2mins() { + try { + Thread.sleep(120_000); // Sleep for 2 mins. + } catch (InterruptedException ie) { + logger.warn("Sleeping was interrupted!"); + } + } + }