Handle the case when a urlReports-sublist does not have any payloads inside.

This commit is contained in:
Lampros Smyrnaios 2024-03-12 14:25:00 +02:00
parent e20c5d2146
commit e4540e7f3c
1 changed files with 9 additions and 2 deletions

View File

@ -195,6 +195,7 @@ public class ParquetFileUtils {
List<List<UrlReport>> finalSubLists = Lists.partition(urlReports, sizeOfEachSubList); // This needs the "sizeOfEachSubList" to be above < 0 >.
int numSubListsPayload = finalSubLists.size();
// We will run <numSubListsPayload> tasks for the payloads.
// Since the payloads are not evenly destributed in the urlReports, there may be some sub-lists of urlReports without any payloads inside.
for ( int i = 0; i < numSubListsPayload; ++i ) {
int finalI = i;
callableTasks.add(() -> { // Handle inserts to the "payload" table. Around 20% of the total amount.
@ -269,7 +270,7 @@ public class ParquetFileUtils {
int recordsSize = recordList.size();
if ( recordsSize == 0 ) {
logger.error("No attempts are available to be inserted to the database!"); // This should have been caught earlier.
logger.error("No attempt-parquet-records could be created in order to be inserted to the database!");
return false;
}
@ -295,6 +296,7 @@ public class ParquetFileUtils {
{
List<GenericData.Record> recordList = new ArrayList<>((int) (urlReports.size() * 0.2));
GenericData.Record record;
int numPayloadsInsideUrlReports = 0;
for ( UrlReport urlReport : urlReports )
{
@ -308,6 +310,8 @@ public class ParquetFileUtils {
if ( fileLocation == null ) // We want only the records with uploaded full-texts in the "payload" table.
continue;
numPayloadsInsideUrlReports ++;
Timestamp timestamp = payload.getTimestamp_acquired();
record = getPayloadParquetRecord(payload.getId(), payload.getOriginal_url(), payload.getActual_url(),
(timestamp != null) ? timestamp.getTime() : System.currentTimeMillis(),
@ -317,9 +321,12 @@ public class ParquetFileUtils {
recordList.add(record);
}
if ( numPayloadsInsideUrlReports == 0 )
return true; // This urlsRerports-sublist does not have any payloads inside to use. That's fine.
int recordsSize = recordList.size();
if ( recordsSize == 0 ) {
logger.error("No payloads are available to be inserted to the database!"); // This should have been caught earlier.
logger.error("No payload-parquet-records could be created in order to be inserted to the database!");
return false;
}