- Add the "getNumberOfAllDistinctFullTexts" stats-endpoint.

- Add TODOs for more stats endpoints.
- Code polishing.
This commit is contained in:
Lampros Smyrnaios 2023-05-04 15:48:49 +03:00
parent b3196376eb
commit 42b93e9429
2 changed files with 41 additions and 11 deletions

View File

@ -33,7 +33,7 @@ public class StatsController {
public ResponseEntity<?> getNumberOfAllPayloads() {
logger.info("Received a \"getNumberOfAllPayloads\" request.");
final String getPayloadsNumberQuery = "select count(id) from " + ImpalaConnector.databaseName + ".payload";
return statsService.getNumberOfPayloads(getPayloadsNumberQuery, "");
return statsService.getNumberOfPayloads(getPayloadsNumberQuery, "payloads");
}
@ -45,7 +45,7 @@ public class StatsController {
public ResponseEntity<?> getNumberOfPayloadsAggregatedByService() {
logger.info("Received a \"getNumberOfPayloadsAggregatedByService\" request.");
String getPayloadsAggregatedQuery = "select count(id) from " + ImpalaConnector.databaseName + ".payload where `date` >= cast(cast('2021-01-01' as timestamp) as bigint)";
return statsService.getNumberOfPayloads(getPayloadsAggregatedQuery, "retrieved by the PDF Aggregation Service");
return statsService.getNumberOfPayloads(getPayloadsAggregatedQuery, "payloads retrieved by the PDF Aggregation Service");
}
@ -55,14 +55,42 @@ public class StatsController {
@GetMapping("getNumberOfPayloadsForDatasource")
public ResponseEntity<?> getNumberOfPayloadsForDatasource(@RequestParam String datasourceId) {
logger.info("Received a \"getNumberOfPayloadsForDatasource\" request.");
final String getPayloadsNumberForDatasourceQuery =
"select count(p.id) from " + ImpalaConnector.databaseName + ".payload p\n" +
" join " + ImpalaConnector.databaseName + ".publication pu on pu.id=p.id and pu.datasourceid=\"" + datasourceId + "\"";
logger.trace("getPayloadsNumberForDatasourceQuery:\n" + getPayloadsNumberForDatasourceQuery);
return statsService.getNumberOfPayloads(getPayloadsNumberForDatasourceQuery, "payloads related to datasourceId \"" + datasourceId + "\"");
}
return statsService.getNumberOfPayloads(getPayloadsNumberForDatasourceQuery, "related to datasourceId \"" + datasourceId + "\"");
// TODO - Add an endpoint to get the publication year as a param and return the number of payloads for the publications of that year.
// select count(p.id) from payload p
// join publication pu on pu.id=p.id and pu.pub_year=<GIVEN_YEAR>
// TODO - Add an endpoint to return the info of all datasources in the database with the count of their payloads (including 0).
// Maybe have a param "numTopDatasources" which will work a a "limit" in the following query.
// In case the "numTopDatasources" param is not given or is less or equal to 0, then no limit will be added to the query.
/*
select d.id, d.name, d.type, d.allow_harvest, count(p.id) as payload_count from datasource d
join publication pu on pu.datasourceid=d.id
left join payload p on p.id=pu.id -- We want the datasources with 0 payloads too.
group by d.id, d.name, d.type, d.allow_harvest
order by payload_count desc
*/
/**
* This endpoint returns the total number of distinct full-text files existing in the database.
* */
@GetMapping("getNumberOfAllDistinctFullTexts")
public ResponseEntity<?> getNumberOfAllDistinctFullTexts() {
logger.info("Received a \"getNumberOfAllDistinctFullTexts\" request.");
final String getPayloadsNumberQuery = "select count(distinct `hash`) from " + ImpalaConnector.databaseName + ".payload";
return statsService.getNumberOfPayloads(getPayloadsNumberQuery, "distinct full-text files");
}

View File

@ -18,20 +18,22 @@ public class StatsServiceImpl implements StatsService {
@Autowired
private JdbcTemplate jdbcTemplate;
// No DB-lock is required for these READ-operations.
public ResponseEntity<?> getNumberOfPayloads(String getPayloadsNumberQuery, String extraMsg) {
public ResponseEntity<?> getNumberOfPayloads(String getNumberQuery, String message) {
try {
Object result = jdbcTemplate.queryForObject(getPayloadsNumberQuery, Integer.class);
Object result = jdbcTemplate.queryForObject(getNumberQuery, Integer.class);
if ( result != null ) {
int numOfPayloads = (int) result;
logger.info("Number of payloads " + extraMsg + " in the database \"" + ImpalaConnector.databaseName + "\" is " + numOfPayloads);
logger.info("The number of " + message + " in the database \"" + ImpalaConnector.databaseName + "\" is " + numOfPayloads);
return new ResponseEntity<>(numOfPayloads, HttpStatus.OK);
} else
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("The payloads' number could not be retrieved from the database \"" + ImpalaConnector.databaseName + "\" using the getPayloadsNumberQuery: " + getPayloadsNumberQuery);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("The number of " + message + " could not be retrieved from the database \"" + ImpalaConnector.databaseName + "\" using the getNumberQuery: " + getNumberQuery);
} catch (EmptyResultDataAccessException erdae) {
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("The payloads' number could not be retrieved from the database \"" + ImpalaConnector.databaseName + "\" using the getPayloadsNumberQuery: " + getPayloadsNumberQuery);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("The number of " + message + " could not be retrieved from the database \"" + ImpalaConnector.databaseName + "\" using the getNumberQuery: " + getNumberQuery);
} catch (Exception e) {
String errorMsg = "Problem when executing \"getPayloadsNumberQuery\": " + getPayloadsNumberQuery;
String errorMsg = "Problem when executing \"getNumberQuery\": " + getNumberQuery;
logger.error(errorMsg, e);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
// We may get a "Class Cast Exception", in case the Impala returns a non-integer value.