63 lines
3.1 KiB
Java
63 lines
3.1 KiB
Java
|
package eu.openaire.pdf_aggregation_statistics.services;
|
||
|
|
||
|
import org.slf4j.Logger;
|
||
|
import org.slf4j.LoggerFactory;
|
||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||
|
import org.springframework.beans.factory.annotation.Value;
|
||
|
import org.springframework.dao.EmptyResultDataAccessException;
|
||
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||
|
import org.springframework.stereotype.Service;
|
||
|
|
||
|
import java.sql.SQLException;
|
||
|
import java.util.concurrent.ConcurrentHashMap;
|
||
|
|
||
|
|
||
|
@Service
|
||
|
public class StatsServiceImpl implements StatsService {
|
||
|
|
||
|
private static final Logger logger = LoggerFactory.getLogger(StatsServiceImpl.class);
|
||
|
|
||
|
@Autowired
|
||
|
private JdbcTemplate jdbcTemplate;
|
||
|
|
||
|
@Value("${database-name}")
|
||
|
private String databaseName;
|
||
|
|
||
|
// No DB-lock is required for these READ-operations.
|
||
|
|
||
|
public static final ConcurrentHashMap<String, Integer> datasourcesWithNumOfPayloads = new ConcurrentHashMap<>(105_000); // The number of datasources is around 10_000.
|
||
|
|
||
|
|
||
|
public void gatherNumberOfPayloadsPerDatasource()
|
||
|
{
|
||
|
final String getNumberOfPayloadsPerDatasourceQuery =
|
||
|
"select d.id, count(p.id) as payload_count from " + databaseName + ".datasource d\n" +
|
||
|
" join " + databaseName + ".publication pu on pu.datasourceid=d.id\n" +
|
||
|
" left join " + databaseName + ".payload p on p.id=pu.id\n" + // We want the datasources with 0 payloads too, so we use "left join"
|
||
|
" group by d.id"; // The group-by is needed.
|
||
|
|
||
|
if ( logger.isTraceEnabled() )
|
||
|
logger.trace("getNumberOfPayloadsPerDatasourceQuery:\n" + getNumberOfPayloadsPerDatasourceQuery);
|
||
|
|
||
|
logger.info("Going to populate/update the \"datasourcesWithNumOfPayloads\" map.");
|
||
|
try {
|
||
|
jdbcTemplate.query(getNumberOfPayloadsPerDatasourceQuery, rs -> {
|
||
|
try { // For each of the 4 columns returned. The indexing starts from 1
|
||
|
datasourcesWithNumOfPayloads.put(rs.getString(1), rs.getInt(2)); // Updates then number for an existing datasourceId or adds a new mapping for a new datasourceId.
|
||
|
} catch (SQLException sqle) {
|
||
|
logger.error("No value was able to be retrieved from one of the columns of row_" + rs.getRow(), sqle);
|
||
|
}
|
||
|
});
|
||
|
logger.info("The \"datasourcesWithNumOfPayloads\" map was populated/updated.");
|
||
|
} catch (EmptyResultDataAccessException erdae) {
|
||
|
logger.warn("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery);
|
||
|
} catch (Exception e) {
|
||
|
logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// To get the human-friendly timestamp format from the BigInt in the database:
|
||
|
// select from_timestamp(CAST(CAST(`date` as decimal(30,0))/1000 AS timestamp), "yyyy-MM-dd HH:mm:ss.SSS") from payload
|
||
|
|
||
|
}
|