- Improve response-time to "getAssignments"-requests, by avoiding merging the parquet files of the "assignment" table, right after acquiring the assignments from the DB. They are already getting merged, when each assignments-batch is deleted after a workerReport has been processed.

- Optimize code-positioning for unlocking the DB when done executing queries.
This commit is contained in:
Lampros Smyrnaios 2023-09-13 17:03:11 +03:00
parent 8fdb8e9137
commit 02bae38885
1 changed files with 12 additions and 18 deletions

View File

@ -222,29 +222,22 @@ public class UrlsServiceImpl implements UrlsService {
try {
jdbcTemplate.execute(insertAssignmentsQuery);
} catch (Exception e) {
errorMsg = DatabaseConnector.handleQueryException("insertAssignmentsQuery", insertAssignmentsQuery, e);
String tmpErrMsg = dropCurrentAssignmentTable();
DatabaseConnector.databaseLock.unlock();
errorMsg = DatabaseConnector.handleQueryException("insertAssignmentsQuery", insertAssignmentsQuery, e);
if ( tmpErrMsg != null )
errorMsg += "\n" + tmpErrMsg;
DatabaseConnector.databaseLock.unlock();
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
}
errorMsg = dropCurrentAssignmentTable();
if ( errorMsg != null ) {
DatabaseConnector.databaseLock.unlock();
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
}
logger.debug("Finished inserting " + assignmentsSize + " assignments into the \"assignment\"-table. Going to merge the parquet files for this table.");
String mergeErrorMsg = fileUtils.mergeParquetFiles("assignment", "", null);
if ( mergeErrorMsg != null ) {
DatabaseConnector.databaseLock.unlock();
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(mergeErrorMsg);
}
DatabaseConnector.databaseLock.unlock();
if ( errorMsg != null )
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
// We do not need to "merge" the parquet files for the "assignment" table here, since this happens every time we delete the assignments of a specific batch.
logger.debug("Finished inserting " + assignmentsSize + " assignments into the \"assignment\"-table.");
// Due to the fact that one publication with an id-url pair can be connected with multiple datasources, the results returned from the query may be duplicates.
// So, we apply a post-processing step where we collect only one instance of each id-url pair and send it to the Worker.
@ -504,9 +497,10 @@ public class UrlsServiceImpl implements UrlsService {
public String deleteAssignmentsBatch(long givenAssignmentsBatchCounter)
{
// This will delete the rows of the "assignment" table which refer to the "curWorkerId". As we have non-KUDU Impala tables, the Delete operation can only succeed through a "merge" operation of the rest of the data.
// Only the rows referring to OTHER workerIDs get stored in a temp-table, while the "assignment" table gets deleted. Then, the temp_table becomes the "assignment" table.
// We don't need to keep the assignment-info anymore, the "findAssignmentsQuery" checks the "payload_aggregated" table for previously handled tasks.
// This will delete the rows of the "assignment" table which refer to the "givenAssignmentsBatchCounter".
// As we have non-KUDU Impala tables, the Delete operation can only succeed through a "merge" operation of the rest of the data.
// Only the rows referring to OTHER "givenAssignmentsBatchCounter" get stored in a temp-table, while the "assignment" table gets deleted. Then, the temp_table becomes the "assignment" table.
// We don't need to keep the assignment-info anymore, the "findAssignmentsQuery" checks the "payload" table for previously handled tasks.
return fileUtils.mergeParquetFiles("assignment", " WHERE assignments_batch_counter != ", givenAssignmentsBatchCounter);
}