package eu.openaire.urls_controller.controllers; import eu.openaire.urls_controller.configuration.ImpalaConnector; import eu.openaire.urls_controller.services.StatsService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; /** * This controller returns statistics for the database. */ @RestController @RequestMapping("/stats") public class StatsController { private static final Logger logger = LoggerFactory.getLogger(StatsController.class); @Autowired private StatsService statsService; /** * This endpoint returns the total number of payloads existing in the database, independently of the way they were aggregated. * This includes the payloads created by other pieces of software, before the PDF-Aggregation-Service was created. * */ @GetMapping("getNumberOfAllPayloads") public ResponseEntity getNumberOfAllPayloads() { logger.info("Received a \"getNumberOfAllPayloads\" request."); final String getPayloadsNumberQuery = "select count(id) from " + ImpalaConnector.databaseName + ".payload"; return statsService.getNumberOfPayloads(getPayloadsNumberQuery, "payloads"); } /** * This endpoint returns the number of payloads aggregated by the PDF-Aggregated-Service itself. * It excludes the payloads aggregated by other methods, by applying a Date-filter for the records created in 2021 or later. * */ @GetMapping("getNumberOfPayloadsAggregatedByService") public ResponseEntity getNumberOfPayloadsAggregatedByService() { logger.info("Received a \"getNumberOfPayloadsAggregatedByService\" request."); String getPayloadsAggregatedQuery = "select count(id) from " + ImpalaConnector.databaseName + ".payload where `date` >= cast(cast('2021-01-01' as timestamp) as bigint)"; return statsService.getNumberOfPayloads(getPayloadsAggregatedQuery, "payloads retrieved by the PDF Aggregation Service"); } /** * This endpoint returns the number of payloads related to the given datasourceID. * */ @GetMapping("getNumberOfPayloadsForDatasource") public ResponseEntity getNumberOfPayloadsForDatasource(@RequestParam String datasourceId) { logger.info("Received a \"getNumberOfPayloadsForDatasource\" request."); final String getPayloadsNumberForDatasourceQuery = "select count(p.id) from " + ImpalaConnector.databaseName + ".payload p\n" + " join " + ImpalaConnector.databaseName + ".publication pu on pu.id=p.id and pu.datasourceid=\"" + datasourceId + "\""; if ( logger.isTraceEnabled() ) logger.trace("getPayloadsNumberForDatasourceQuery:\n" + getPayloadsNumberForDatasourceQuery); return statsService.getNumberOfPayloads(getPayloadsNumberForDatasourceQuery, "payloads related to datasourceId \"" + datasourceId + "\""); } // TODO - Add an endpoint to get the publication year as a param and return the number of payloads for the publications of that year. // select count(p.id) from payload p // join publication pu on pu.id=p.id and pu.year= // TODO - Add an endpoint to return the info of all datasources in the database with the count of their payloads (including 0). // Maybe have a param "numTopDatasources" which will work a a "limit" in the following query. // In case the "numTopDatasources" param is not given or is less or equal to 0, then no limit will be added to the query. /* select d.id, d.name, d.type, d.allow_harvest, count(p.id) as payload_count from datasource d join publication pu on pu.datasourceid=d.id left join payload p on p.id=pu.id -- We want the datasources with 0 payloads too, so we use "left join". group by d.id, d.name, d.type, d.allow_harvest order by payload_count desc */ /** * This endpoint returns the total number of distinct full-text files existing in the database. * */ @GetMapping("getNumberOfAllDistinctFullTexts") public ResponseEntity getNumberOfAllDistinctFullTexts() { logger.info("Received a \"getNumberOfAllDistinctFullTexts\" request."); final String getPayloadsNumberQuery = "select count(distinct `hash`) from " + ImpalaConnector.databaseName + ".payload"; return statsService.getNumberOfPayloads(getPayloadsNumberQuery, "distinct full-text files"); } /** * This endpoint returns the number of records inspected by the PDF-Aggregation-Service. * */ @GetMapping("getNumberOfRecordsInspected") public ResponseEntity getNumberOfRecordsInspected() { return statsService.getNumberOfRecordsInspected(); } }