package eu.openaire.pdf_aggregation_statistics.services; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.dao.EmptyResultDataAccessException; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.stereotype.Service; import java.sql.SQLException; import java.util.concurrent.ConcurrentHashMap; @Service public class StatsServiceImpl implements StatsService { private static final Logger logger = LoggerFactory.getLogger(StatsServiceImpl.class); @Autowired private JdbcTemplate jdbcTemplate; @Value("${database-name}") private String databaseName; // No DB-lock is required for these READ-operations. public static final ConcurrentHashMap datasourcesWithNumOfPayloads = new ConcurrentHashMap<>(105_000); // The number of datasources is around 10_000. public void gatherNumberOfPayloadsPerDatasource() { final String getNumberOfPayloadsPerDatasourceQuery = "select d.id, count(p.id) as payload_count from " + databaseName + ".datasource d\n" + " join " + databaseName + ".publication pu on pu.datasourceid=d.id\n" + " left join " + databaseName + ".payload p on p.id=pu.id\n" + // We want the datasources with 0 payloads too, so we use "left join" " group by d.id"; // The group-by is needed. if ( logger.isTraceEnabled() ) logger.trace("getNumberOfPayloadsPerDatasourceQuery:\n" + getNumberOfPayloadsPerDatasourceQuery); logger.info("Going to populate/update the \"datasourcesWithNumOfPayloads\" map."); try { jdbcTemplate.query(getNumberOfPayloadsPerDatasourceQuery, rs -> { try { // For each of the 4 columns returned. The indexing starts from 1 datasourcesWithNumOfPayloads.put(rs.getString(1), rs.getInt(2)); // Updates then number for an existing datasourceId or adds a new mapping for a new datasourceId. } catch (SQLException sqle) { logger.error("No value was able to be retrieved from one of the columns of row_" + rs.getRow(), sqle); } }); logger.info("The \"datasourcesWithNumOfPayloads\" map was populated/updated."); } catch (EmptyResultDataAccessException erdae) { logger.warn("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery); } catch (Exception e) { logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e); } } // To get the human-friendly timestamp format from the BigInt in the database: // select from_timestamp(CAST(CAST(`date` as decimal(30,0))/1000 AS timestamp), "yyyy-MM-dd HH:mm:ss.SSS") from payload }