package eu.openaire.urls_controller.controllers; import eu.openaire.urls_controller.configuration.DatabaseConnector; import eu.openaire.urls_controller.services.StatsService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.MissingServletRequestParameterException; import org.springframework.web.bind.annotation.*; /** * This controller returns statistics for the database. */ @RestController @RequestMapping("/stats") public class StatsController { private static final Logger logger = LoggerFactory.getLogger(StatsController.class); @Autowired private StatsService statsService; // This method shows the parameters which are missing when dealing with the bulk-import API. // Spring Boot does not show any specific messages to the user (like stacktraces), for security reasons. @ExceptionHandler(MissingServletRequestParameterException.class) public ResponseEntity handleMissingParams(MissingServletRequestParameterException ex) { return ResponseEntity.badRequest().body(String.format("Missing parameter: %s\n", ex.getParameterName())); } /** * This endpoint returns the total number of payloads existing in the database, independently of the way they were aggregated. * This includes the payloads created by other pieces of software, before the PDF-Aggregation-Service was created. * */ @GetMapping(value = "getNumberOfAllPayloads", produces = MediaType.TEXT_PLAIN_VALUE) public ResponseEntity getNumberOfAllPayloads(boolean isCalledFromScheduler) { if ( ! isCalledFromScheduler ) logger.info("Received a \"getNumberOfAllPayloads\" request."); final String getAllPayloadsNumberQuery = "select count(id) from " + DatabaseConnector.databaseName + ".payload"; return statsService.getNumberOfPayloads(getAllPayloadsNumberQuery, "all payloads", 0); } /** * This endpoint returns the number of payloads aggregated by the PDF-Aggregated-Service itself, through crawling. * */ @GetMapping(value = "getNumberOfPayloadsAggregatedByServiceThroughCrawling", produces = MediaType.TEXT_PLAIN_VALUE) public ResponseEntity getNumberOfPayloadsAggregatedByServiceThroughCrawling(boolean isCalledFromScheduler) { if ( ! isCalledFromScheduler ) logger.info("Received a \"getNumberOfPayloadsAggregatedByServiceThroughCrawling\" request."); String getNumOfPayloadsAggregatedByServiceThroughCrawlingQuery = "select count(id) from " + DatabaseConnector.databaseName + ".payload_aggregated"; return statsService.getNumberOfPayloads(getNumOfPayloadsAggregatedByServiceThroughCrawlingQuery, "payloads aggregated by the Service through crawling", 0); } /** * This endpoint returns the number of payloads aggregated by this Service, through BulkImport procedures with compatible datasources.. * */ @GetMapping(value = "getNumberOfPayloadsAggregatedByServiceThroughBulkImport", produces = MediaType.TEXT_PLAIN_VALUE) public ResponseEntity getNumberOfPayloadsAggregatedByServiceThroughBulkImport(boolean isCalledFromScheduler) { if ( ! isCalledFromScheduler ) logger.info("Received a \"getNumberOfPayloadsAggregatedByServiceThroughBulkImport\" request."); String getNumOfPayloadsAggregatedByServiceThroughBulkImportQuery = "select count(id) from " + DatabaseConnector.databaseName + ".payload_bulk_import"; return statsService.getNumberOfPayloads(getNumOfPayloadsAggregatedByServiceThroughBulkImportQuery, "payloads aggregated by the Service through BulkImport procedures", 0); } /** * This endpoint returns the number of payloads aggregated by the PDF-Aggregated-Service itself, through crawling AND bulk-import procedures. * */ @GetMapping(value = "getNumberOfPayloadsAggregatedByService", produces = MediaType.TEXT_PLAIN_VALUE) public ResponseEntity getNumberOfPayloadsAggregatedByService(boolean isCalledFromScheduler) { if ( ! isCalledFromScheduler ) logger.info("Received a \"getNumberOfPayloadsAggregatedByService\" request."); String getNumOfPayloadsAggregatedByServiceQuery = "select count(id) from\n" + " (select id from " + DatabaseConnector.databaseName + ".payload_aggregated\n" + " union all\n" + " select id from " + DatabaseConnector.databaseName + ".payload_bulk_import)\n" + " as payloads_from_service"; return statsService.getNumberOfPayloads(getNumOfPayloadsAggregatedByServiceQuery, "payloads aggregated by the Service, through both crawling and bulk-import procedures", 0); } /** * This endpoint returns the number of legacy payloads, which were aggregated by methods other thant the PDF Aggregation Service. * */ @GetMapping(value = "getNumberOfLegacyPayloads", produces = MediaType.TEXT_PLAIN_VALUE) public ResponseEntity getNumberOfLegacyPayloads(boolean isCalledFromScheduler) { if ( ! isCalledFromScheduler ) logger.info("Received a \"getNumberOfLegacyPayloads\" request."); String getNumOfLegacyPayloadsQuery = "select count(id) from " + DatabaseConnector.databaseName + ".payload_legacy"; return statsService.getNumberOfPayloads(getNumOfLegacyPayloadsQuery, "legacy payloads", 0); } /** * This endpoint returns the number of payloads related to the given datasourceID. * */ @GetMapping(value = "getNumberOfPayloadsForDatasource", produces = MediaType.TEXT_PLAIN_VALUE) public ResponseEntity getNumberOfPayloadsForDatasource(@RequestParam String datasourceId) { logger.info("Received a \"getNumberOfPayloadsForDatasource\" request."); final String getNumOfPayloadsForDatasourceQuery = "select count(p.id) from " + DatabaseConnector.databaseName + ".payload p\n" + " join " + DatabaseConnector.databaseName + ".publication pu on pu.id=p.id and pu.datasourceid=\"" + datasourceId + "\""; if ( logger.isTraceEnabled() ) logger.trace("getNumOfPayloadsForDatasourceQuery:\n" + getNumOfPayloadsForDatasourceQuery); return statsService.getNumberOfPayloads(getNumOfPayloadsForDatasourceQuery, "payloads related to datasourceId \"" + datasourceId + "\"", 0); } // TODO - Add an endpoint to get the publication year as a param and return the number of payloads for the publications of that year. // select count(p.id) from payload p // join publication pu on pu.id=p.id and pu.year= // TODO - Add an endpoint to return the info of all datasources in the database with the count of their payloads (including 0). // Maybe have a param "numTopDatasources" which will work a a "limit" in the following query. // In case the "numTopDatasources" param is not given or is less or equal to 0, then no limit will be added to the query. /* select d.id, d.name, d.type, d.allow_harvest, count(p.id) as payload_count from datasource d join publication pu on pu.datasourceid=d.id -- We want the datasources with at least 1 publication. left join payload p on p.id=pu.id -- We want the datasources with 0 payloads too, so we use "left join". group by d.id, d.name, d.type, d.allow_harvest order by payload_count desc */ // TODO - Add an endpoint to return the number of payloads found for each publication-year, in descending order.. // For example the number of payloads for publications published in 2016 is // --//-- the number for 2017 is // Add a "limit" parameter for the user to specify that wants only the last 5 years (2019-2023). /** * This endpoint returns the total number of distinct full-text files existing in the database. * */ @GetMapping(value = "getNumberOfAllDistinctFullTexts", produces = MediaType.TEXT_PLAIN_VALUE) public ResponseEntity getNumberOfAllDistinctFullTexts() { logger.info("Received a \"getNumberOfAllDistinctFullTexts\" request."); final String getPayloadsNumberQuery = "select count(distinct `hash`) from " + DatabaseConnector.databaseName + ".payload"; return statsService.getNumberOfPayloads(getPayloadsNumberQuery, "distinct full-text files", 0); } /** * This endpoint returns the number of records inspected by the PDF-Aggregation-Service, through crawling. * */ @GetMapping(value = "getNumberOfRecordsInspectedByServiceThroughCrawling", produces = MediaType.TEXT_PLAIN_VALUE) public ResponseEntity getNumberOfRecordsInspectedByServiceThroughCrawling(boolean isCalledFromScheduler) { if ( ! isCalledFromScheduler ) logger.info("Received a \"getNumberOfRecordsInspectedByServiceThroughCrawling\" request."); return statsService.getNumberOfRecordsInspectedByServiceThroughCrawling(0); } }