From 42b93e9429232ab66838ce37332ec45b7bcfe285 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Thu, 4 May 2023 15:48:49 +0300 Subject: [PATCH] - Add the "getNumberOfAllDistinctFullTexts" stats-endpoint. - Add TODOs for more stats endpoints. - Code polishing. --- .../controllers/StatsController.java | 38 ++++++++++++++++--- .../services/StatsServiceImpl.java | 14 ++++--- 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/src/main/java/eu/openaire/urls_controller/controllers/StatsController.java b/src/main/java/eu/openaire/urls_controller/controllers/StatsController.java index c029c78..d549c5e 100644 --- a/src/main/java/eu/openaire/urls_controller/controllers/StatsController.java +++ b/src/main/java/eu/openaire/urls_controller/controllers/StatsController.java @@ -33,7 +33,7 @@ public class StatsController { public ResponseEntity getNumberOfAllPayloads() { logger.info("Received a \"getNumberOfAllPayloads\" request."); final String getPayloadsNumberQuery = "select count(id) from " + ImpalaConnector.databaseName + ".payload"; - return statsService.getNumberOfPayloads(getPayloadsNumberQuery, ""); + return statsService.getNumberOfPayloads(getPayloadsNumberQuery, "payloads"); } @@ -45,7 +45,7 @@ public class StatsController { public ResponseEntity getNumberOfPayloadsAggregatedByService() { logger.info("Received a \"getNumberOfPayloadsAggregatedByService\" request."); String getPayloadsAggregatedQuery = "select count(id) from " + ImpalaConnector.databaseName + ".payload where `date` >= cast(cast('2021-01-01' as timestamp) as bigint)"; - return statsService.getNumberOfPayloads(getPayloadsAggregatedQuery, "retrieved by the PDF Aggregation Service"); + return statsService.getNumberOfPayloads(getPayloadsAggregatedQuery, "payloads retrieved by the PDF Aggregation Service"); } @@ -55,14 +55,42 @@ public class StatsController { @GetMapping("getNumberOfPayloadsForDatasource") public ResponseEntity getNumberOfPayloadsForDatasource(@RequestParam String datasourceId) { logger.info("Received a \"getNumberOfPayloadsForDatasource\" request."); - final String getPayloadsNumberForDatasourceQuery = "select count(p.id) from " + ImpalaConnector.databaseName + ".payload p\n" + " join " + ImpalaConnector.databaseName + ".publication pu on pu.id=p.id and pu.datasourceid=\"" + datasourceId + "\""; - logger.trace("getPayloadsNumberForDatasourceQuery:\n" + getPayloadsNumberForDatasourceQuery); + return statsService.getNumberOfPayloads(getPayloadsNumberForDatasourceQuery, "payloads related to datasourceId \"" + datasourceId + "\""); + } - return statsService.getNumberOfPayloads(getPayloadsNumberForDatasourceQuery, "related to datasourceId \"" + datasourceId + "\""); + + + // TODO - Add an endpoint to get the publication year as a param and return the number of payloads for the publications of that year. + // select count(p.id) from payload p + // join publication pu on pu.id=p.id and pu.pub_year= + + + + // TODO - Add an endpoint to return the info of all datasources in the database with the count of their payloads (including 0). + // Maybe have a param "numTopDatasources" which will work a a "limit" in the following query. + // In case the "numTopDatasources" param is not given or is less or equal to 0, then no limit will be added to the query. + /* + select d.id, d.name, d.type, d.allow_harvest, count(p.id) as payload_count from datasource d + join publication pu on pu.datasourceid=d.id + left join payload p on p.id=pu.id -- We want the datasources with 0 payloads too. + group by d.id, d.name, d.type, d.allow_harvest + order by payload_count desc + */ + + + + /** + * This endpoint returns the total number of distinct full-text files existing in the database. + * */ + @GetMapping("getNumberOfAllDistinctFullTexts") + public ResponseEntity getNumberOfAllDistinctFullTexts() { + logger.info("Received a \"getNumberOfAllDistinctFullTexts\" request."); + final String getPayloadsNumberQuery = "select count(distinct `hash`) from " + ImpalaConnector.databaseName + ".payload"; + return statsService.getNumberOfPayloads(getPayloadsNumberQuery, "distinct full-text files"); } diff --git a/src/main/java/eu/openaire/urls_controller/services/StatsServiceImpl.java b/src/main/java/eu/openaire/urls_controller/services/StatsServiceImpl.java index c8c82b6..85ec6ed 100644 --- a/src/main/java/eu/openaire/urls_controller/services/StatsServiceImpl.java +++ b/src/main/java/eu/openaire/urls_controller/services/StatsServiceImpl.java @@ -18,20 +18,22 @@ public class StatsServiceImpl implements StatsService { @Autowired private JdbcTemplate jdbcTemplate; + // No DB-lock is required for these READ-operations. - public ResponseEntity getNumberOfPayloads(String getPayloadsNumberQuery, String extraMsg) { + + public ResponseEntity getNumberOfPayloads(String getNumberQuery, String message) { try { - Object result = jdbcTemplate.queryForObject(getPayloadsNumberQuery, Integer.class); + Object result = jdbcTemplate.queryForObject(getNumberQuery, Integer.class); if ( result != null ) { int numOfPayloads = (int) result; - logger.info("Number of payloads " + extraMsg + " in the database \"" + ImpalaConnector.databaseName + "\" is " + numOfPayloads); + logger.info("The number of " + message + " in the database \"" + ImpalaConnector.databaseName + "\" is " + numOfPayloads); return new ResponseEntity<>(numOfPayloads, HttpStatus.OK); } else - return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("The payloads' number could not be retrieved from the database \"" + ImpalaConnector.databaseName + "\" using the getPayloadsNumberQuery: " + getPayloadsNumberQuery); + return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("The number of " + message + " could not be retrieved from the database \"" + ImpalaConnector.databaseName + "\" using the getNumberQuery: " + getNumberQuery); } catch (EmptyResultDataAccessException erdae) { - return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("The payloads' number could not be retrieved from the database \"" + ImpalaConnector.databaseName + "\" using the getPayloadsNumberQuery: " + getPayloadsNumberQuery); + return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("The number of " + message + " could not be retrieved from the database \"" + ImpalaConnector.databaseName + "\" using the getNumberQuery: " + getNumberQuery); } catch (Exception e) { - String errorMsg = "Problem when executing \"getPayloadsNumberQuery\": " + getPayloadsNumberQuery; + String errorMsg = "Problem when executing \"getNumberQuery\": " + getNumberQuery; logger.error(errorMsg, e); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg); // We may get a "Class Cast Exception", in case the Impala returns a non-integer value.