Handle the case when a urlReports-sublist does not have any payloads inside.
This commit is contained in:
parent
e20c5d2146
commit
e4540e7f3c
|
@ -195,6 +195,7 @@ public class ParquetFileUtils {
|
||||||
List<List<UrlReport>> finalSubLists = Lists.partition(urlReports, sizeOfEachSubList); // This needs the "sizeOfEachSubList" to be above < 0 >.
|
List<List<UrlReport>> finalSubLists = Lists.partition(urlReports, sizeOfEachSubList); // This needs the "sizeOfEachSubList" to be above < 0 >.
|
||||||
int numSubListsPayload = finalSubLists.size();
|
int numSubListsPayload = finalSubLists.size();
|
||||||
// We will run <numSubListsPayload> tasks for the payloads.
|
// We will run <numSubListsPayload> tasks for the payloads.
|
||||||
|
// Since the payloads are not evenly destributed in the urlReports, there may be some sub-lists of urlReports without any payloads inside.
|
||||||
for ( int i = 0; i < numSubListsPayload; ++i ) {
|
for ( int i = 0; i < numSubListsPayload; ++i ) {
|
||||||
int finalI = i;
|
int finalI = i;
|
||||||
callableTasks.add(() -> { // Handle inserts to the "payload" table. Around 20% of the total amount.
|
callableTasks.add(() -> { // Handle inserts to the "payload" table. Around 20% of the total amount.
|
||||||
|
@ -269,7 +270,7 @@ public class ParquetFileUtils {
|
||||||
|
|
||||||
int recordsSize = recordList.size();
|
int recordsSize = recordList.size();
|
||||||
if ( recordsSize == 0 ) {
|
if ( recordsSize == 0 ) {
|
||||||
logger.error("No attempts are available to be inserted to the database!"); // This should have been caught earlier.
|
logger.error("No attempt-parquet-records could be created in order to be inserted to the database!");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -295,6 +296,7 @@ public class ParquetFileUtils {
|
||||||
{
|
{
|
||||||
List<GenericData.Record> recordList = new ArrayList<>((int) (urlReports.size() * 0.2));
|
List<GenericData.Record> recordList = new ArrayList<>((int) (urlReports.size() * 0.2));
|
||||||
GenericData.Record record;
|
GenericData.Record record;
|
||||||
|
int numPayloadsInsideUrlReports = 0;
|
||||||
|
|
||||||
for ( UrlReport urlReport : urlReports )
|
for ( UrlReport urlReport : urlReports )
|
||||||
{
|
{
|
||||||
|
@ -308,6 +310,8 @@ public class ParquetFileUtils {
|
||||||
if ( fileLocation == null ) // We want only the records with uploaded full-texts in the "payload" table.
|
if ( fileLocation == null ) // We want only the records with uploaded full-texts in the "payload" table.
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
numPayloadsInsideUrlReports ++;
|
||||||
|
|
||||||
Timestamp timestamp = payload.getTimestamp_acquired();
|
Timestamp timestamp = payload.getTimestamp_acquired();
|
||||||
record = getPayloadParquetRecord(payload.getId(), payload.getOriginal_url(), payload.getActual_url(),
|
record = getPayloadParquetRecord(payload.getId(), payload.getOriginal_url(), payload.getActual_url(),
|
||||||
(timestamp != null) ? timestamp.getTime() : System.currentTimeMillis(),
|
(timestamp != null) ? timestamp.getTime() : System.currentTimeMillis(),
|
||||||
|
@ -317,9 +321,12 @@ public class ParquetFileUtils {
|
||||||
recordList.add(record);
|
recordList.add(record);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( numPayloadsInsideUrlReports == 0 )
|
||||||
|
return true; // This urlsRerports-sublist does not have any payloads inside to use. That's fine.
|
||||||
|
|
||||||
int recordsSize = recordList.size();
|
int recordsSize = recordList.size();
|
||||||
if ( recordsSize == 0 ) {
|
if ( recordsSize == 0 ) {
|
||||||
logger.error("No payloads are available to be inserted to the database!"); // This should have been caught earlier.
|
logger.error("No payload-parquet-records could be created in order to be inserted to the database!");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue