UrlsController/src/main/java/eu/openaire/urls_controller/controllers/StatsController.java

110 lines
4.9 KiB
Java

package eu.openaire.urls_controller.controllers;
import eu.openaire.urls_controller.configuration.ImpalaConnector;
import eu.openaire.urls_controller.services.StatsService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
/**
* This controller returns statistics for the database.
*/
@RestController
@RequestMapping("/stats")
public class StatsController {
private static final Logger logger = LoggerFactory.getLogger(StatsController.class);
@Autowired
private StatsService statsService;
/**
* This endpoint returns the total number of payloads existing in the database, independently of the way they were aggregated.
* This includes the payloads created by other pieces of software, before the PDF-Aggregation-Service was created.
* */
@GetMapping("getNumberOfAllPayloads")
public ResponseEntity<?> getNumberOfAllPayloads() {
logger.info("Received a \"getNumberOfAllPayloads\" request.");
final String getPayloadsNumberQuery = "select count(id) from " + ImpalaConnector.databaseName + ".payload";
return statsService.getNumberOfPayloads(getPayloadsNumberQuery, "payloads");
}
/**
* This endpoint returns the number of payloads aggregated by the PDF-Aggregated-Service itself.
* It excludes the payloads aggregated by other methods, by applying a Date-filter for the records created in 2021 or later.
* */
@GetMapping("getNumberOfPayloadsAggregatedByService")
public ResponseEntity<?> getNumberOfPayloadsAggregatedByService() {
logger.info("Received a \"getNumberOfPayloadsAggregatedByService\" request.");
String getPayloadsAggregatedQuery = "select count(id) from " + ImpalaConnector.databaseName + ".payload where `date` >= cast(cast('2021-01-01' as timestamp) as bigint)";
return statsService.getNumberOfPayloads(getPayloadsAggregatedQuery, "payloads retrieved by the PDF Aggregation Service");
}
/**
* This endpoint returns the number of payloads related to the given datasourceID.
* */
@GetMapping("getNumberOfPayloadsForDatasource")
public ResponseEntity<?> getNumberOfPayloadsForDatasource(@RequestParam String datasourceId) {
logger.info("Received a \"getNumberOfPayloadsForDatasource\" request.");
final String getPayloadsNumberForDatasourceQuery =
"select count(p.id) from " + ImpalaConnector.databaseName + ".payload p\n" +
" join " + ImpalaConnector.databaseName + ".publication pu on pu.id=p.id and pu.datasourceid=\"" + datasourceId + "\"";
if ( logger.isTraceEnabled() )
logger.trace("getPayloadsNumberForDatasourceQuery:\n" + getPayloadsNumberForDatasourceQuery);
return statsService.getNumberOfPayloads(getPayloadsNumberForDatasourceQuery, "payloads related to datasourceId \"" + datasourceId + "\"");
}
// TODO - Add an endpoint to get the publication year as a param and return the number of payloads for the publications of that year.
// select count(p.id) from payload p
// join publication pu on pu.id=p.id and pu.year=<GIVEN_YEAR>
// TODO - Add an endpoint to return the info of all datasources in the database with the count of their payloads (including 0).
// Maybe have a param "numTopDatasources" which will work a a "limit" in the following query.
// In case the "numTopDatasources" param is not given or is less or equal to 0, then no limit will be added to the query.
/*
select d.id, d.name, d.type, d.allow_harvest, count(p.id) as payload_count from datasource d
join publication pu on pu.datasourceid=d.id
left join payload p on p.id=pu.id -- We want the datasources with 0 payloads too, so we use "left join".
group by d.id, d.name, d.type, d.allow_harvest
order by payload_count desc
*/
/**
* This endpoint returns the total number of distinct full-text files existing in the database.
* */
@GetMapping("getNumberOfAllDistinctFullTexts")
public ResponseEntity<?> getNumberOfAllDistinctFullTexts() {
logger.info("Received a \"getNumberOfAllDistinctFullTexts\" request.");
final String getPayloadsNumberQuery = "select count(distinct `hash`) from " + ImpalaConnector.databaseName + ".payload";
return statsService.getNumberOfPayloads(getPayloadsNumberQuery, "distinct full-text files");
}
/**
* This endpoint returns the number of records inspected by the PDF-Aggregation-Service.
* */
@GetMapping("getNumberOfRecordsInspected")
public ResponseEntity<?> getNumberOfRecordsInspected()
{
return statsService.getNumberOfRecordsInspected();
}
}