- Handle the case where the "gatherNumberOfPayloadsPerDatasource()"-query-method is executed while some tables of the DB are in a "merge" state. In this case, the queries fail and the App retries up to 10 times.
- Handle the case when the aforementioned query-method fails with some unhandleable error. In this case the app retries 1 time and if it fails again, then if this has happened during initialization, the app shuts down, else the incident is ignored and the app will retry after 6 hours when the method is scheduled to run again.
This commit is contained in:
parent
8746d301d5
commit
1ee1a94eff
|
@ -30,7 +30,7 @@ public class SchedulingTasks {
|
|||
// When the user requests the numOfPayloads for a given datasourceI, the app will return the result immediately.
|
||||
// It will be a quick O(1) GET operation in the ConcurrentHashMap.
|
||||
|
||||
if ( ! statsService.gatherNumberOfPayloadsPerDatasource()
|
||||
if ( ! statsService.gatherNumberOfPayloadsPerDatasource(0)
|
||||
&& runningFirstTime )
|
||||
PdfAggregationStatisticsApplication.gentleAppShutdown();
|
||||
|
||||
|
|
|
@ -3,6 +3,6 @@ package eu.openaire.pdf_aggregation_statistics.services;
|
|||
|
||||
public interface StatsService {
|
||||
|
||||
boolean gatherNumberOfPayloadsPerDatasource();
|
||||
boolean gatherNumberOfPayloadsPerDatasource(int retryCount);
|
||||
|
||||
}
|
||||
|
|
|
@ -28,8 +28,13 @@ public class StatsServiceImpl implements StatsService {
|
|||
public static final ConcurrentHashMap<String, Integer> datasourcesWithNumOfPayloads = new ConcurrentHashMap<>(105_000); // The number of datasources is around 10_000.
|
||||
|
||||
|
||||
public boolean gatherNumberOfPayloadsPerDatasource()
|
||||
public boolean gatherNumberOfPayloadsPerDatasource(int retryCount)
|
||||
{
|
||||
if ( retryCount > 10 ) {
|
||||
logger.error("Could not find the requested payload-type table in an non-merging state, after " + retryCount + " retries!");
|
||||
return false;
|
||||
}
|
||||
|
||||
final String getNumberOfPayloadsPerDatasourceQuery =
|
||||
"select d.id, count(p.id) as payload_count from " + databaseName + ".datasource d\n" +
|
||||
" join " + databaseName + ".publication pu on pu.datasourceid=d.id\n" +
|
||||
|
@ -54,12 +59,34 @@ public class StatsServiceImpl implements StatsService {
|
|||
logger.error("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery);
|
||||
return false;
|
||||
} catch (Exception e) {
|
||||
logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e);
|
||||
return false;
|
||||
String exMsg = e.getMessage();
|
||||
if ( (exMsg != null) && (exMsg.contains("Could not resolve table reference") || exMsg.contains("Failed to open HDFS file")) ) {
|
||||
sleep2mins(); // The tables may be under merging at the moment, so sleep a bit and try again.
|
||||
return gatherNumberOfPayloadsPerDatasource(++retryCount);
|
||||
} else {
|
||||
// If such an unknown error appears during initialization, it is fatal but not something that is so remarkable to completely avoid deploying the app to save time..
|
||||
// We allow for 1 retry, 2 Minutes later. If the error appears again then the app will shutdown.
|
||||
logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e);
|
||||
if ( retryCount == 0 ) {
|
||||
sleep2mins(); // The DB may have some failure
|
||||
return gatherNumberOfPayloadsPerDatasource(++retryCount);
|
||||
} else // Already 1 retry happened and failed for the unknown error.
|
||||
return false; // If the 1st retry for the unknown error failed, then do not try again.
|
||||
// When this method returns, the app will either shut down if it is during initialization or it will ignore it and retry in 6 hours.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// To get the human-friendly timestamp format from the BigInt in the database:
|
||||
// select from_timestamp(CAST(CAST(`date` as decimal(30,0))/1000 AS timestamp), "yyyy-MM-dd HH:mm:ss.SSS") from payload
|
||||
|
||||
|
||||
private void sleep2mins() {
|
||||
try {
|
||||
Thread.sleep(120_000); // Sleep for 2 mins.
|
||||
} catch (InterruptedException ie) {
|
||||
logger.warn("Sleeping was interrupted!");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue