From e4540e7f3cf9f81e34e2d0454a26165a31cb16e9 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Tue, 12 Mar 2024 14:25:00 +0200 Subject: [PATCH] Handle the case when a urlReports-sublist does not have any payloads inside. --- .../urls_controller/util/ParquetFileUtils.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java index 7f10fb5..617fd5b 100644 --- a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java @@ -195,6 +195,7 @@ public class ParquetFileUtils { List> finalSubLists = Lists.partition(urlReports, sizeOfEachSubList); // This needs the "sizeOfEachSubList" to be above < 0 >. int numSubListsPayload = finalSubLists.size(); // We will run tasks for the payloads. + // Since the payloads are not evenly destributed in the urlReports, there may be some sub-lists of urlReports without any payloads inside. for ( int i = 0; i < numSubListsPayload; ++i ) { int finalI = i; callableTasks.add(() -> { // Handle inserts to the "payload" table. Around 20% of the total amount. @@ -269,7 +270,7 @@ public class ParquetFileUtils { int recordsSize = recordList.size(); if ( recordsSize == 0 ) { - logger.error("No attempts are available to be inserted to the database!"); // This should have been caught earlier. + logger.error("No attempt-parquet-records could be created in order to be inserted to the database!"); return false; } @@ -295,6 +296,7 @@ public class ParquetFileUtils { { List recordList = new ArrayList<>((int) (urlReports.size() * 0.2)); GenericData.Record record; + int numPayloadsInsideUrlReports = 0; for ( UrlReport urlReport : urlReports ) { @@ -308,6 +310,8 @@ public class ParquetFileUtils { if ( fileLocation == null ) // We want only the records with uploaded full-texts in the "payload" table. continue; + numPayloadsInsideUrlReports ++; + Timestamp timestamp = payload.getTimestamp_acquired(); record = getPayloadParquetRecord(payload.getId(), payload.getOriginal_url(), payload.getActual_url(), (timestamp != null) ? timestamp.getTime() : System.currentTimeMillis(), @@ -317,9 +321,12 @@ public class ParquetFileUtils { recordList.add(record); } + if ( numPayloadsInsideUrlReports == 0 ) + return true; // This urlsRerports-sublist does not have any payloads inside to use. That's fine. + int recordsSize = recordList.size(); if ( recordsSize == 0 ) { - logger.error("No payloads are available to be inserted to the database!"); // This should have been caught earlier. + logger.error("No payload-parquet-records could be created in order to be inserted to the database!"); return false; }