From ab18ac5ff813fab3bb10ef87a56c83be29d01f93 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Fri, 14 Jun 2024 17:27:52 +0300 Subject: [PATCH] Add new prometheus metrics: - averageFulltextsTransferSizeOfWorkerReports - averageSuccessPercentageOfWorkerReports --- README.md | 2 ++ .../components/ScheduledTasks.java | 20 +++++++++++++++++++ .../urls_controller/util/FileUtils.java | 4 ++++ .../urls_controller/util/FilesHandler.java | 5 ++++- 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4639b49..b7e12f6 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,8 @@ Note: The Shutdown Service API is accessible by the Controller's host machine. - "**numOfPayloadsAggregatedByService**" - "**numOfLegacyPayloads**" - "**numOfRecordsInspectedByServiceThroughCrawling**" +- "**averageFulltextsTransferSizeOfWorkerReports**" +- "**averageSuccessPercentageOfWorkerReports**" - "**getAssignments_time_seconds_max**": Time taken to return the assignments. - "**addWorkerReport_time_seconds**": Time taken to add the WorkerReport.
diff --git a/src/main/java/eu/openaire/urls_controller/components/ScheduledTasks.java b/src/main/java/eu/openaire/urls_controller/components/ScheduledTasks.java index f5901f5..0b16654 100644 --- a/src/main/java/eu/openaire/urls_controller/components/ScheduledTasks.java +++ b/src/main/java/eu/openaire/urls_controller/components/ScheduledTasks.java @@ -1,5 +1,6 @@ package eu.openaire.urls_controller.components; +import com.google.common.util.concurrent.AtomicDouble; import com.google.gson.Gson; import com.google.gson.JsonSyntaxException; import eu.openaire.urls_controller.UrlsControllerApplication; @@ -31,6 +32,7 @@ import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -68,6 +70,12 @@ public class ScheduledTasks { public static final AtomicInteger numOfRecordsInspectedByServiceThroughCrawling = new AtomicInteger(0); + public static final AtomicLong totalFulltextWorkerReportSize = new AtomicLong(0); // This is only usefull to get the "averageSize", since upon restart, this value will be reset to ZERO. (and we do not care about getting the SUM of sizes of each fulltext, from the DB) + public static final AtomicLong averageFulltextsTransferSizeOfWorkerReports = new AtomicLong(0); + + public static final AtomicDouble totalSuccessPercentagesOfWorkerReports = new AtomicDouble(0); + public static final AtomicDouble averageSuccessPercentageOfWorkerReports = new AtomicDouble(0); + public ScheduledTasks(@Value("${services.pdfaggregation.controller.workerReportsDirPath}") String workerReportsDirPath, StatsController statsController, UrlsController urlsController, MeterRegistry registry) { @@ -84,6 +92,8 @@ public class ScheduledTasks { registry.gauge("numOfPayloadsAggregatedByService", numOfPayloadsAggregatedByService); registry.gauge("numOfLegacyPayloads", numOfLegacyPayloads); registry.gauge("numOfRecordsInspectedByServiceThroughCrawling", numOfRecordsInspectedByServiceThroughCrawling); + registry.gauge("averageFulltextsTransferSizeOfWorkerReports", averageFulltextsTransferSizeOfWorkerReports); + registry.gauge("averageSuccessPercentageOfWorkerReports", averageSuccessPercentageOfWorkerReports); } @@ -300,6 +310,16 @@ public class ScheduledTasks { numOfRecordsInspectedByServiceThroughCrawling.set(Integer.parseInt(responseEntity.getBody().toString())); // (any other cast method fails) } // Any error is already logged. + // Record the batch size to know if the zstd files transferred are getting larger or smaller over time. + long numWorkerReportsInt = UrlsServiceImpl.numOfWorkerReportsProcessed.get(); + if ( numWorkerReportsInt > 0 ) { + averageFulltextsTransferSizeOfWorkerReports.set(totalFulltextWorkerReportSize.get() / numWorkerReportsInt); + averageSuccessPercentageOfWorkerReports.set(totalSuccessPercentagesOfWorkerReports.get()/ numWorkerReportsInt); // The result will still be a percentage <= 100. + } + logger.info("averageFulltextsTransferSizeOfWorkerReports = " + (averageFulltextsTransferSizeOfWorkerReports.get() / 1048576) + " MB"); // Divide by (1024 * 1024) to get the value in MB. + logger.info("averageSuccessPercentageOfWorkerReports = " + df.format(averageSuccessPercentageOfWorkerReports.get()) + " %"); + // The above two metrics are not very accurate, for small number of workerReports, since at the time of assignments, the "numWorkerReportsInt" may include 1-2 workerReports which have not reached the stage of calculating their percentage or getting their fulltexts from the workers. + // Nevertheless, in time, after processing 10s of workerReports, they show a near-accurate picture of the averages. // TODO - Export more complex data; , , // , ..., , ... diff --git a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java index edc9811..faf85da 100644 --- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java @@ -2,6 +2,7 @@ package eu.openaire.urls_controller.util; import com.google.common.collect.HashMultimap; import com.google.common.collect.SetMultimap; +import eu.openaire.urls_controller.components.ScheduledTasks; import eu.openaire.urls_controller.configuration.DatabaseConnector; import eu.openaire.urls_controller.models.Error; import eu.openaire.urls_controller.models.Payload; @@ -301,13 +302,16 @@ public class FileUtils { public boolean saveArchive(InputStream inputStream, File zstdFile) { + long numBytesOfZstdFile = 0; try ( BufferedInputStream inStream = new BufferedInputStream(inputStream, tenMb); BufferedOutputStream outStream = new BufferedOutputStream(Files.newOutputStream(zstdFile.toPath()), tenMb) ) { int readBytes; while ( (readBytes = inStream.read()) != -1 ) { outStream.write(readBytes); + numBytesOfZstdFile ++; } + ScheduledTasks.totalFulltextWorkerReportSize.addAndGet(numBytesOfZstdFile); // Add this zstd size in order to get the "average", for workerReports, later. return true; } catch (Exception e) { logger.error("Could not save the zstd file \"" + zstdFile.getName() + "\": " + e.getMessage(), e); diff --git a/src/main/java/eu/openaire/urls_controller/util/FilesHandler.java b/src/main/java/eu/openaire/urls_controller/util/FilesHandler.java index 0f4b5f9..0af96cb 100644 --- a/src/main/java/eu/openaire/urls_controller/util/FilesHandler.java +++ b/src/main/java/eu/openaire/urls_controller/util/FilesHandler.java @@ -1,6 +1,7 @@ package eu.openaire.urls_controller.util; import com.google.common.collect.HashMultimap; +import eu.openaire.urls_controller.components.ScheduledTasks; import eu.openaire.urls_controller.controllers.UrlsController; import eu.openaire.urls_controller.models.Payload; import eu.openaire.urls_controller.models.UrlReport; @@ -199,7 +200,9 @@ public class FilesHandler { return FileUtils.UploadFullTextsResponse.successful_without_fulltexts; // It was handled, no error. } - logger.info("NumFullTextsFound by assignments_" + assignmentsBatchCounter + " = " + numValidFullTextsFound + " (out of " + sizeOfUrlReports + " | about " + df.format(numValidFullTextsFound * 100.0 / sizeOfUrlReports) + "%)."); + double successPercentage = (numValidFullTextsFound * 100.0 / sizeOfUrlReports); + ScheduledTasks.totalSuccessPercentagesOfWorkerReports.addAndGet(successPercentage); + logger.info("NumFullTextsFound by assignments_" + assignmentsBatchCounter + " = " + numValidFullTextsFound + " (out of " + sizeOfUrlReports + " | about " + df.format(successPercentage) + "%)."); // TODO - Have a prometheus GAUGE to hold the value of the above percentage, so that we can track the success-rates over time..