- Improve response-time to "getAssignments"-requests, by avoiding merging the parquet files of the "assignment" table, right after acquiring the assignments from the DB. They are already getting merged, when each assignments-batch is deleted after a workerReport has been processed.

- Optimize code-positioning for unlocking the DB when done executing queries.
2023-09-13 17:03:11 +03:00 · 2023-09-13 17:03:11 +03:00 · 02bae38885
parent 8fdb8e9137
commit 02bae38885
1 changed files with 12 additions and 18 deletions
--- a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java
+++ b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java
@ -222,29 +222,22 @@ public class UrlsServiceImpl implements UrlsService {
        try {
            jdbcTemplate.execute(insertAssignmentsQuery);
        } catch (Exception e) {
-            errorMsg = DatabaseConnector.handleQueryException("insertAssignmentsQuery", insertAssignmentsQuery, e);
            String tmpErrMsg = dropCurrentAssignmentTable();
+            DatabaseConnector.databaseLock.unlock();
+            errorMsg = DatabaseConnector.handleQueryException("insertAssignmentsQuery", insertAssignmentsQuery, e);
            if ( tmpErrMsg != null )
                errorMsg += "\n" + tmpErrMsg;
-            DatabaseConnector.databaseLock.unlock();
            return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
        }

        errorMsg = dropCurrentAssignmentTable();
-        if ( errorMsg != null ) {
-            DatabaseConnector.databaseLock.unlock();
-            return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
-        }
-
-        logger.debug("Finished inserting " + assignmentsSize + " assignments into the \"assignment\"-table. Going to merge the parquet files for this table.");
-
-        String mergeErrorMsg = fileUtils.mergeParquetFiles("assignment", "", null);
-        if ( mergeErrorMsg != null ) {
-            DatabaseConnector.databaseLock.unlock();
-            return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(mergeErrorMsg);
-        }
-
        DatabaseConnector.databaseLock.unlock();
+        if ( errorMsg != null )
+            return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
+
+        // We do not need to "merge" the parquet files for the "assignment" table here, since this happens every time we delete the assignments of a specific batch.
+
+        logger.debug("Finished inserting " + assignmentsSize + " assignments into the \"assignment\"-table.");

        // Due to the fact that one publication with an id-url pair can be connected with multiple datasources, the results returned from the query may be duplicates.
        // So, we apply a post-processing step where we collect only one instance of each id-url pair and send it to the Worker.
@ -504,9 +497,10 @@ public class UrlsServiceImpl implements UrlsService {

    public String deleteAssignmentsBatch(long givenAssignmentsBatchCounter)
    {
-        // This will delete the rows of the "assignment" table which refer to the "curWorkerId". As we have non-KUDU Impala tables, the Delete operation can only succeed through a "merge" operation of the rest of the data.
-        // Only the rows referring to OTHER workerIDs get stored in a temp-table, while the "assignment" table gets deleted. Then, the temp_table becomes the "assignment" table.
-        // We don't need to keep the assignment-info anymore, the "findAssignmentsQuery" checks the "payload_aggregated" table for previously handled tasks.
+        // This will delete the rows of the "assignment" table which refer to the "givenAssignmentsBatchCounter".
+        // As we have non-KUDU Impala tables, the Delete operation can only succeed through a "merge" operation of the rest of the data.
+        // Only the rows referring to OTHER "givenAssignmentsBatchCounter" get stored in a temp-table, while the "assignment" table gets deleted. Then, the temp_table becomes the "assignment" table.
+        // We don't need to keep the assignment-info anymore, the "findAssignmentsQuery" checks the "payload" table for previously handled tasks.
        return fileUtils.mergeParquetFiles("assignment", " WHERE assignments_batch_counter != ", givenAssignmentsBatchCounter);
    }