2022-10-18 14:00:26 +02:00
package eu.openaire.urls_controller.controllers ;
2023-08-23 15:55:23 +02:00
import eu.openaire.urls_controller.configuration.DatabaseConnector ;
2023-02-09 18:25:48 +01:00
import eu.openaire.urls_controller.services.StatsService ;
2022-10-18 14:00:26 +02:00
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
import org.springframework.beans.factory.annotation.Autowired ;
2023-07-25 11:03:27 +02:00
import org.springframework.http.MediaType ;
2022-10-18 14:00:26 +02:00
import org.springframework.http.ResponseEntity ;
2023-07-25 14:36:54 +02:00
import org.springframework.web.bind.MissingServletRequestParameterException ;
import org.springframework.web.bind.annotation.* ;
2022-10-18 14:00:26 +02:00
2023-06-23 14:22:26 +02:00
2022-10-18 14:00:26 +02:00
/ * *
2023-02-02 16:58:47 +01:00
* This controller returns statistics for the database .
2022-10-18 14:00:26 +02:00
* /
@RestController
@RequestMapping ( " /stats " )
public class StatsController {
private static final Logger logger = LoggerFactory . getLogger ( StatsController . class ) ;
@Autowired
2023-02-09 18:25:48 +01:00
private StatsService statsService ;
2022-10-18 14:00:26 +02:00
2023-07-25 14:36:54 +02:00
// This method shows the parameters which are missing when dealing with the bulk-import API.
// Spring Boot does not show any specific messages to the user (like stacktraces), for security reasons.
@ExceptionHandler ( MissingServletRequestParameterException . class )
public ResponseEntity < ? > handleMissingParams ( MissingServletRequestParameterException ex ) {
return ResponseEntity . badRequest ( ) . body ( String . format ( " Missing parameter: %s \ n " , ex . getParameterName ( ) ) ) ;
}
2023-02-02 16:58:47 +01:00
/ * *
* This endpoint returns the total number of payloads existing in the database , independently of the way they were aggregated .
* This includes the payloads created by other pieces of software , before the PDF - Aggregation - Service was created .
* * /
2023-07-25 11:03:27 +02:00
@GetMapping ( value = " getNumberOfAllPayloads " , produces = MediaType . TEXT_PLAIN_VALUE )
2023-06-19 13:42:00 +02:00
public ResponseEntity < ? > getNumberOfAllPayloads ( boolean isCalledFromScheduler )
{
if ( ! isCalledFromScheduler )
logger . info ( " Received a \" getNumberOfAllPayloads \" request. " ) ;
2023-08-23 15:55:23 +02:00
final String getAllPayloadsNumberQuery = " select count(id) from " + DatabaseConnector . databaseName + " .payload " ;
2023-07-06 17:29:13 +02:00
return statsService . getNumberOfPayloads ( getAllPayloadsNumberQuery , " all payloads " , 0 ) ;
2023-06-23 14:22:26 +02:00
}
/ * *
* This endpoint returns the number of payloads aggregated by the PDF - Aggregated - Service itself , through crawling .
* * /
2023-07-25 11:03:27 +02:00
@GetMapping ( value = " getNumberOfPayloadsAggregatedByServiceThroughCrawling " , produces = MediaType . TEXT_PLAIN_VALUE )
2023-06-23 14:22:26 +02:00
public ResponseEntity < ? > getNumberOfPayloadsAggregatedByServiceThroughCrawling ( boolean isCalledFromScheduler )
{
if ( ! isCalledFromScheduler )
logger . info ( " Received a \" getNumberOfPayloadsAggregatedByServiceThroughCrawling \" request. " ) ;
2023-08-23 15:55:23 +02:00
String getNumOfPayloadsAggregatedByServiceThroughCrawlingQuery = " select count(id) from " + DatabaseConnector . databaseName + " .payload_aggregated " ;
2023-07-06 17:29:13 +02:00
return statsService . getNumberOfPayloads ( getNumOfPayloadsAggregatedByServiceThroughCrawlingQuery , " payloads aggregated by the Service through crawling " , 0 ) ;
2023-02-02 16:58:47 +01:00
}
/ * *
2023-06-23 14:22:26 +02:00
* This endpoint returns the number of payloads aggregated by this Service , through BulkImport procedures with compatible datasources . .
* * /
2023-07-25 11:03:27 +02:00
@GetMapping ( value = " getNumberOfPayloadsAggregatedByServiceThroughBulkImport " , produces = MediaType . TEXT_PLAIN_VALUE )
2023-06-23 14:22:26 +02:00
public ResponseEntity < ? > getNumberOfPayloadsAggregatedByServiceThroughBulkImport ( boolean isCalledFromScheduler )
{
if ( ! isCalledFromScheduler )
logger . info ( " Received a \" getNumberOfPayloadsAggregatedByServiceThroughBulkImport \" request. " ) ;
2023-08-23 15:55:23 +02:00
String getNumOfPayloadsAggregatedByServiceThroughBulkImportQuery = " select count(id) from " + DatabaseConnector . databaseName + " .payload_bulk_import " ;
2023-07-06 17:29:13 +02:00
return statsService . getNumberOfPayloads ( getNumOfPayloadsAggregatedByServiceThroughBulkImportQuery , " payloads aggregated by the Service through BulkImport procedures " , 0 ) ;
2023-06-23 14:22:26 +02:00
}
/ * *
* This endpoint returns the number of payloads aggregated by the PDF - Aggregated - Service itself , through crawling AND bulk - import procedures.
2023-02-02 16:58:47 +01:00
* * /
2023-07-25 11:03:27 +02:00
@GetMapping ( value = " getNumberOfPayloadsAggregatedByService " , produces = MediaType . TEXT_PLAIN_VALUE )
2023-06-23 14:22:26 +02:00
public ResponseEntity < ? > getNumberOfPayloadsAggregatedByService ( boolean isCalledFromScheduler )
{
if ( ! isCalledFromScheduler )
logger . info ( " Received a \" getNumberOfPayloadsAggregatedByService \" request. " ) ;
String getNumOfPayloadsAggregatedByServiceQuery = " select count(id) from \ n " +
2023-08-23 15:55:23 +02:00
" (select id from " + DatabaseConnector . databaseName + " .payload_aggregated \ n " +
2023-06-23 14:22:26 +02:00
" union all \ n " +
2023-08-23 15:55:23 +02:00
" select id from " + DatabaseConnector . databaseName + " .payload_bulk_import) \ n " +
2023-06-23 14:22:26 +02:00
" as payloads_from_service " ;
2023-07-06 17:29:13 +02:00
return statsService . getNumberOfPayloads ( getNumOfPayloadsAggregatedByServiceQuery , " payloads aggregated by the Service, through both crawling and bulk-import procedures " , 0 ) ;
2023-06-23 14:22:26 +02:00
}
/ * *
* This endpoint returns the number of legacy payloads , which were aggregated by methods other thant the PDF Aggregation Service .
* * /
2023-07-25 11:03:27 +02:00
@GetMapping ( value = " getNumberOfLegacyPayloads " , produces = MediaType . TEXT_PLAIN_VALUE )
2023-06-23 14:22:26 +02:00
public ResponseEntity < ? > getNumberOfLegacyPayloads ( boolean isCalledFromScheduler )
{
if ( ! isCalledFromScheduler )
logger . info ( " Received a \" getNumberOfLegacyPayloads \" request. " ) ;
2023-08-23 15:55:23 +02:00
String getNumOfLegacyPayloadsQuery = " select count(id) from " + DatabaseConnector . databaseName + " .payload_legacy " ;
2023-07-06 17:29:13 +02:00
return statsService . getNumberOfPayloads ( getNumOfLegacyPayloadsQuery , " legacy payloads " , 0 ) ;
2022-10-18 14:00:26 +02:00
}
2023-04-24 08:54:35 +02:00
/ * *
* This endpoint returns the number of payloads related to the given datasourceID .
* * /
2023-07-25 11:03:27 +02:00
@GetMapping ( value = " getNumberOfPayloadsForDatasource " , produces = MediaType . TEXT_PLAIN_VALUE )
2023-04-24 08:54:35 +02:00
public ResponseEntity < ? > getNumberOfPayloadsForDatasource ( @RequestParam String datasourceId ) {
logger . info ( " Received a \" getNumberOfPayloadsForDatasource \" request. " ) ;
2023-06-23 14:22:26 +02:00
final String getNumOfPayloadsForDatasourceQuery =
2023-08-23 15:55:23 +02:00
" select count(p.id) from " + DatabaseConnector . databaseName + " .payload p \ n " +
" join " + DatabaseConnector . databaseName + " .publication pu on pu.id=p.id and pu.datasourceid= \" " + datasourceId + " \" " ;
2023-05-15 11:44:16 +02:00
if ( logger . isTraceEnabled ( ) )
2023-06-23 14:22:26 +02:00
logger . trace ( " getNumOfPayloadsForDatasourceQuery: \ n " + getNumOfPayloadsForDatasourceQuery ) ;
2023-05-15 11:44:16 +02:00
2023-07-06 17:29:13 +02:00
return statsService . getNumberOfPayloads ( getNumOfPayloadsForDatasourceQuery , " payloads related to datasourceId \" " + datasourceId + " \" " , 0 ) ;
2023-05-04 14:48:49 +02:00
}
// TODO - Add an endpoint to get the publication year as a param and return the number of payloads for the publications of that year.
// select count(p.id) from payload p
2023-06-15 22:19:36 +02:00
// join publication pu on pu.id=p.id and pu.year=<GIVEN_YEAR>
2023-05-04 14:48:49 +02:00
2023-04-24 08:54:35 +02:00
2023-05-04 14:48:49 +02:00
// TODO - Add an endpoint to return the info of all datasources in the database with the count of their payloads (including 0).
// Maybe have a param "numTopDatasources" which will work a a "limit" in the following query.
// In case the "numTopDatasources" param is not given or is less or equal to 0, then no limit will be added to the query.
/ *
select d . id , d . name , d . type , d . allow_harvest , count ( p . id ) as payload_count from datasource d
2023-07-19 17:31:24 +02:00
join publication pu on pu . datasourceid = d . id - - We want the datasources with at least 1 publication .
2023-06-15 22:19:36 +02:00
left join payload p on p . id = pu . id - - We want the datasources with 0 payloads too , so we use " left join " .
2023-05-04 14:48:49 +02:00
group by d . id , d . name , d . type , d . allow_harvest
order by payload_count desc
* /
2023-06-19 13:42:00 +02:00
// TODO - Add an endpoint to return the number of payloads found for each publication-year, in descending order..
// For example the number of payloads for publications published in 2016 is <number>
// --//-- the number for 2017 is <number>
2023-08-23 15:55:23 +02:00
// Add a "limit" parameter for the user to specify that wants only the last 5 years (2019-2023).
2023-06-19 13:42:00 +02:00
2023-05-04 14:48:49 +02:00
/ * *
* This endpoint returns the total number of distinct full - text files existing in the database .
* * /
2023-07-25 11:03:27 +02:00
@GetMapping ( value = " getNumberOfAllDistinctFullTexts " , produces = MediaType . TEXT_PLAIN_VALUE )
2023-05-04 14:48:49 +02:00
public ResponseEntity < ? > getNumberOfAllDistinctFullTexts ( ) {
logger . info ( " Received a \" getNumberOfAllDistinctFullTexts \" request. " ) ;
2023-08-23 15:55:23 +02:00
final String getPayloadsNumberQuery = " select count(distinct `hash`) from " + DatabaseConnector . databaseName + " .payload " ;
2023-07-06 17:29:13 +02:00
return statsService . getNumberOfPayloads ( getPayloadsNumberQuery , " distinct full-text files " , 0 ) ;
2023-04-24 08:54:35 +02:00
}
2023-02-02 16:58:47 +01:00
/ * *
2023-06-23 14:22:26 +02:00
* This endpoint returns the number of records inspected by the PDF - Aggregation - Service , through crawling .
2023-02-02 16:58:47 +01:00
* * /
2023-07-25 11:03:27 +02:00
@GetMapping ( value = " getNumberOfRecordsInspectedByServiceThroughCrawling " , produces = MediaType . TEXT_PLAIN_VALUE )
2023-06-23 14:22:26 +02:00
public ResponseEntity < ? > getNumberOfRecordsInspectedByServiceThroughCrawling ( boolean isCalledFromScheduler )
2022-10-18 14:00:26 +02:00
{
2023-06-19 13:42:00 +02:00
if ( ! isCalledFromScheduler )
2023-06-23 14:22:26 +02:00
logger . info ( " Received a \" getNumberOfRecordsInspectedByServiceThroughCrawling \" request. " ) ;
2023-06-19 13:42:00 +02:00
2023-07-06 17:29:13 +02:00
return statsService . getNumberOfRecordsInspectedByServiceThroughCrawling ( 0 ) ;
2023-02-02 16:58:47 +01:00
}
2022-10-18 14:00:26 +02:00
}