178 lines
9.0 KiB
Java
178 lines
9.0 KiB
Java
package eu.openaire.urls_controller.controllers;
|
|
|
|
|
|
import eu.openaire.urls_controller.configuration.DatabaseConnector;
|
|
import eu.openaire.urls_controller.services.StatsService;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
import org.springframework.http.MediaType;
|
|
import org.springframework.http.ResponseEntity;
|
|
import org.springframework.web.bind.MissingServletRequestParameterException;
|
|
import org.springframework.web.bind.annotation.*;
|
|
|
|
|
|
/**
|
|
* This controller returns statistics for the database.
|
|
*/
|
|
@RestController
|
|
@RequestMapping("/stats")
|
|
public class StatsController {
|
|
|
|
private static final Logger logger = LoggerFactory.getLogger(StatsController.class);
|
|
|
|
@Autowired
|
|
private StatsService statsService;
|
|
|
|
|
|
// This method shows the parameters which are missing when dealing with the bulk-import API.
|
|
// Spring Boot does not show any specific messages to the user (like stacktraces), for security reasons.
|
|
@ExceptionHandler(MissingServletRequestParameterException.class)
|
|
public ResponseEntity<?> handleMissingParams(MissingServletRequestParameterException ex) {
|
|
return ResponseEntity.badRequest().body(String.format("Missing parameter: %s\n", ex.getParameterName()));
|
|
}
|
|
|
|
|
|
/**
|
|
* This endpoint returns the total number of payloads existing in the database, independently of the way they were aggregated.
|
|
* This includes the payloads created by other pieces of software, before the PDF-Aggregation-Service was created.
|
|
* */
|
|
@GetMapping(value = "getNumberOfAllPayloads", produces = MediaType.TEXT_PLAIN_VALUE)
|
|
public ResponseEntity<?> getNumberOfAllPayloads(boolean isCalledFromScheduler)
|
|
{
|
|
if ( ! isCalledFromScheduler )
|
|
logger.info("Received a \"getNumberOfAllPayloads\" request.");
|
|
|
|
final String getAllPayloadsNumberQuery = "select count(id) from " + DatabaseConnector.databaseName + ".payload";
|
|
return statsService.getNumberOfPayloads(getAllPayloadsNumberQuery, "all payloads", 0);
|
|
}
|
|
|
|
|
|
/**
|
|
* This endpoint returns the number of payloads aggregated by the PDF-Aggregated-Service itself, through crawling.
|
|
* */
|
|
@GetMapping(value = "getNumberOfPayloadsAggregatedByServiceThroughCrawling", produces = MediaType.TEXT_PLAIN_VALUE)
|
|
public ResponseEntity<?> getNumberOfPayloadsAggregatedByServiceThroughCrawling(boolean isCalledFromScheduler)
|
|
{
|
|
if ( ! isCalledFromScheduler )
|
|
logger.info("Received a \"getNumberOfPayloadsAggregatedByServiceThroughCrawling\" request.");
|
|
|
|
String getNumOfPayloadsAggregatedByServiceThroughCrawlingQuery = "select count(id) from " + DatabaseConnector.databaseName + ".payload_aggregated";
|
|
return statsService.getNumberOfPayloads(getNumOfPayloadsAggregatedByServiceThroughCrawlingQuery, "payloads aggregated by the Service through crawling", 0);
|
|
}
|
|
|
|
|
|
/**
|
|
* This endpoint returns the number of payloads aggregated by this Service, through BulkImport procedures with compatible datasources..
|
|
* */
|
|
@GetMapping(value = "getNumberOfPayloadsAggregatedByServiceThroughBulkImport", produces = MediaType.TEXT_PLAIN_VALUE)
|
|
public ResponseEntity<?> getNumberOfPayloadsAggregatedByServiceThroughBulkImport(boolean isCalledFromScheduler)
|
|
{
|
|
if ( ! isCalledFromScheduler )
|
|
logger.info("Received a \"getNumberOfPayloadsAggregatedByServiceThroughBulkImport\" request.");
|
|
|
|
String getNumOfPayloadsAggregatedByServiceThroughBulkImportQuery = "select count(id) from " + DatabaseConnector.databaseName + ".payload_bulk_import";
|
|
return statsService.getNumberOfPayloads(getNumOfPayloadsAggregatedByServiceThroughBulkImportQuery, "payloads aggregated by the Service through BulkImport procedures", 0);
|
|
}
|
|
|
|
|
|
/**
|
|
* This endpoint returns the number of payloads aggregated by the PDF-Aggregated-Service itself, through crawling AND bulk-import procedures.
|
|
* */
|
|
@GetMapping(value = "getNumberOfPayloadsAggregatedByService", produces = MediaType.TEXT_PLAIN_VALUE)
|
|
public ResponseEntity<?> getNumberOfPayloadsAggregatedByService(boolean isCalledFromScheduler)
|
|
{
|
|
if ( ! isCalledFromScheduler )
|
|
logger.info("Received a \"getNumberOfPayloadsAggregatedByService\" request.");
|
|
|
|
String getNumOfPayloadsAggregatedByServiceQuery = "select count(id) from\n" +
|
|
" (select id from " + DatabaseConnector.databaseName + ".payload_aggregated\n" +
|
|
" union all\n" +
|
|
" select id from " + DatabaseConnector.databaseName + ".payload_bulk_import)\n" +
|
|
" as payloads_from_service";
|
|
return statsService.getNumberOfPayloads(getNumOfPayloadsAggregatedByServiceQuery, "payloads aggregated by the Service, through both crawling and bulk-import procedures", 0);
|
|
}
|
|
|
|
|
|
/**
|
|
* This endpoint returns the number of legacy payloads, which were aggregated by methods other thant the PDF Aggregation Service.
|
|
* */
|
|
@GetMapping(value = "getNumberOfLegacyPayloads", produces = MediaType.TEXT_PLAIN_VALUE)
|
|
public ResponseEntity<?> getNumberOfLegacyPayloads(boolean isCalledFromScheduler)
|
|
{
|
|
if ( ! isCalledFromScheduler )
|
|
logger.info("Received a \"getNumberOfLegacyPayloads\" request.");
|
|
|
|
String getNumOfLegacyPayloadsQuery = "select count(id) from " + DatabaseConnector.databaseName + ".payload_legacy";
|
|
return statsService.getNumberOfPayloads(getNumOfLegacyPayloadsQuery, "legacy payloads", 0);
|
|
}
|
|
|
|
|
|
/**
|
|
* This endpoint returns the number of payloads related to the given datasourceID.
|
|
* */
|
|
@GetMapping(value = "getNumberOfPayloadsForDatasource", produces = MediaType.TEXT_PLAIN_VALUE)
|
|
public ResponseEntity<?> getNumberOfPayloadsForDatasource(@RequestParam String datasourceId) {
|
|
logger.info("Received a \"getNumberOfPayloadsForDatasource\" request.");
|
|
final String getNumOfPayloadsForDatasourceQuery =
|
|
"select count(p.id) from " + DatabaseConnector.databaseName + ".payload p\n" +
|
|
" join " + DatabaseConnector.databaseName + ".publication pu on pu.id=p.id and pu.datasourceid=\"" + datasourceId + "\"";
|
|
|
|
if ( logger.isTraceEnabled() )
|
|
logger.trace("getNumOfPayloadsForDatasourceQuery:\n" + getNumOfPayloadsForDatasourceQuery);
|
|
|
|
return statsService.getNumberOfPayloads(getNumOfPayloadsForDatasourceQuery, "payloads related to datasourceId \"" + datasourceId + "\"", 0);
|
|
}
|
|
|
|
|
|
|
|
// TODO - Add an endpoint to get the publication year as a param and return the number of payloads for the publications of that year.
|
|
// select count(p.id) from payload p
|
|
// join publication pu on pu.id=p.id and pu.year=<GIVEN_YEAR>
|
|
|
|
|
|
|
|
// TODO - Add an endpoint to return the info of all datasources in the database with the count of their payloads (including 0).
|
|
// Maybe have a param "numTopDatasources" which will work a a "limit" in the following query.
|
|
// In case the "numTopDatasources" param is not given or is less or equal to 0, then no limit will be added to the query.
|
|
/*
|
|
select d.id, d.name, d.type, d.allow_harvest, count(p.id) as payload_count from datasource d
|
|
join publication pu on pu.datasourceid=d.id -- We want the datasources with at least 1 publication.
|
|
left join payload p on p.id=pu.id -- We want the datasources with 0 payloads too, so we use "left join".
|
|
group by d.id, d.name, d.type, d.allow_harvest
|
|
order by payload_count desc
|
|
*/
|
|
|
|
|
|
|
|
// TODO - Add an endpoint to return the number of payloads found for each publication-year, in descending order..
|
|
// For example the number of payloads for publications published in 2016 is <number>
|
|
// --//-- the number for 2017 is <number>
|
|
// Add a "limit" parameter for the user to specify that wants only the last 5 years (2019-2023).
|
|
|
|
|
|
/**
|
|
* This endpoint returns the total number of distinct full-text files existing in the database.
|
|
* */
|
|
@GetMapping(value = "getNumberOfAllDistinctFullTexts", produces = MediaType.TEXT_PLAIN_VALUE)
|
|
public ResponseEntity<?> getNumberOfAllDistinctFullTexts() {
|
|
logger.info("Received a \"getNumberOfAllDistinctFullTexts\" request.");
|
|
final String getPayloadsNumberQuery = "select count(distinct `hash`) from " + DatabaseConnector.databaseName + ".payload";
|
|
return statsService.getNumberOfPayloads(getPayloadsNumberQuery, "distinct full-text files", 0);
|
|
}
|
|
|
|
|
|
/**
|
|
* This endpoint returns the number of records inspected by the PDF-Aggregation-Service, through crawling.
|
|
* */
|
|
@GetMapping(value = "getNumberOfRecordsInspectedByServiceThroughCrawling", produces = MediaType.TEXT_PLAIN_VALUE)
|
|
public ResponseEntity<?> getNumberOfRecordsInspectedByServiceThroughCrawling(boolean isCalledFromScheduler)
|
|
{
|
|
if ( ! isCalledFromScheduler )
|
|
logger.info("Received a \"getNumberOfRecordsInspectedByServiceThroughCrawling\" request.");
|
|
|
|
return statsService.getNumberOfRecordsInspectedByServiceThroughCrawling(0);
|
|
}
|
|
|
|
}
|