Fully functional "PDF Aggregation Statistics" Service.
This commit is contained in:
commit
cf0aed5274
|
@ -0,0 +1,37 @@
|
|||
HELP.md
|
||||
.gradle
|
||||
build/
|
||||
!gradle/wrapper/gradle-wrapper.jar
|
||||
!**/src/main/**/build/
|
||||
!**/src/test/**/build/
|
||||
|
||||
### STS ###
|
||||
.apt_generated
|
||||
.classpath
|
||||
.factorypath
|
||||
.project
|
||||
.settings
|
||||
.springBeans
|
||||
.sts4-cache
|
||||
bin/
|
||||
!**/src/main/**/bin/
|
||||
!**/src/test/**/bin/
|
||||
|
||||
### IntelliJ IDEA ###
|
||||
.idea
|
||||
*.iws
|
||||
*.iml
|
||||
*.ipr
|
||||
out/
|
||||
!**/src/main/**/out/
|
||||
!**/src/test/**/out/
|
||||
|
||||
### NetBeans ###
|
||||
/nbproject/private/
|
||||
/nbbuild/
|
||||
/dist/
|
||||
/nbdist/
|
||||
/.nb-gradle/
|
||||
|
||||
### VS Code ###
|
||||
.vscode/
|
|
@ -0,0 +1,7 @@
|
|||
FROM openjdk:8-jdk-alpine
|
||||
|
||||
COPY build/libs/*-SNAPSHOT.jar pdf_aggregation_statistics.jar
|
||||
|
||||
EXPOSE 1882
|
||||
|
||||
ENTRYPOINT ["java","-jar","/pdf_aggregation_statistics.jar", "--spring.config.location=file:///mnt/config/application.yml"]
|
|
@ -0,0 +1,12 @@
|
|||
# PDF Aggregation Statistics
|
||||
|
||||
This is a public API to get specific statistics from the PDF Aggregation Service.
|
||||
|
||||
|
||||
**To install and run the application**:
|
||||
- Run ```git clone``` and then ```cd pdf_aggregation_statistics```.
|
||||
- Set the preferable values inside the [__application.yml__](https://code-repo.d4science.org/lsmyrnaios/pdf_aggregation_statistics/src/branch/master/src/main/resources/application.yml) file.
|
||||
- Execute the ```installAndRun.sh``` script which builds and runs the app.<br>
|
||||
If you want to just run the app, then run the script with the argument "1": ```./installAndRun.sh 1```.<br>
|
||||
If you want to build and run the app on a **Docker Container**, then run the script with the argument "0" followed by the argument "1": ```./installAndRun.sh 0 1```.<br>
|
||||
<br>
|
|
@ -0,0 +1,100 @@
|
|||
plugins {
|
||||
id 'java'
|
||||
id 'org.springframework.boot' version '2.7.12'
|
||||
id 'io.spring.dependency-management' version '1.1.0'
|
||||
}
|
||||
|
||||
group = 'eu.openaire.pdf_aggregation_statistics'
|
||||
version = '0.0.1-SNAPSHOT'
|
||||
sourceCompatibility = '1.8'
|
||||
|
||||
repositories {
|
||||
mavenCentral()
|
||||
maven {
|
||||
name "omtd"
|
||||
url "https://repo.openminted.eu/content/repositories/releases/"
|
||||
}
|
||||
maven {
|
||||
name "pentaho-repo"
|
||||
url "https://public.nexus.pentaho.org/content/groups/omni/"
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
runtimeOnly "org.springframework.boot:spring-boot-devtools"
|
||||
|
||||
implementation "org.springframework.boot:spring-boot-starter-web"
|
||||
implementation("org.springframework.boot:spring-boot-starter-security")
|
||||
implementation("org.springframework.boot:spring-boot-starter-jdbc")
|
||||
|
||||
implementation("org.springframework.boot:spring-boot-configuration-processor")
|
||||
implementation("org.springframework.boot:spring-boot-starter-actuator")
|
||||
implementation("org.springframework.boot:spring-boot-starter-aop")
|
||||
|
||||
implementation("org.springframework.security:spring-security-core")
|
||||
implementation("org.springframework.security:spring-security-web")
|
||||
implementation("org.springframework.security:spring-security-config")
|
||||
|
||||
// https://mvnrepository.com/artifact/com.cloudera.impala/jdbc
|
||||
implementation("com.cloudera.impala:jdbc:2.5.31") {
|
||||
exclude group: 'org.apache.hive', module: 'hive-exec'
|
||||
exclude group: 'com.twitter', module: 'parquet-hadoop-bundle'
|
||||
exclude group: 'org.apache.parquet', module: 'parquet-avro'
|
||||
exclude group: 'org.apache.avro', module: 'avro'
|
||||
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
|
||||
exclude group: 'org.apache.derby', module: 'derby'
|
||||
exclude group: 'org.eclipse.jetty.aggregate', module: 'jetty-all'
|
||||
exclude group: 'ch.qos.log4j', module: 'log4j'
|
||||
exclude group: 'ch.qos.log4j', module: 'apache-log4j-extras'
|
||||
|
||||
// Vulnerable dependencies:
|
||||
exclude group: 'log4j', module: 'log4j'
|
||||
exclude group: 'org.apache.ant', module: 'ant'
|
||||
exclude group: 'org.apache.thrift', module: 'libthrift' // This is an older version (we add the updated one later).
|
||||
exclude group: 'org.apache.hive', module: 'hive-metastore'
|
||||
// Avoid excluding 'org.apache.hive:hive-service', as this is needed and unfortunately, even adding a newer version separately, it introducing other vulnerable dependencies.
|
||||
}
|
||||
|
||||
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common
|
||||
implementation('org.apache.hadoop:hadoop-common:3.3.5') {
|
||||
exclude group: 'org.apache.parquet', module: 'parquet-avro'
|
||||
exclude group: 'org.apache.avro', module: 'avro'
|
||||
exclude group: 'org.slf4j', module: 'slf4j-api'
|
||||
exclude group: 'org.slf4j', module: 'slf4j-reload4j'
|
||||
exclude group: 'ch.qos.reload4j', module: 'reload4j'
|
||||
|
||||
// Vulnerable dependencies:
|
||||
exclude group: 'com.google.protobuf', module: 'protobuf-java'
|
||||
exclude group: 'org.codehaus.jackson', module: 'jackson-core-asl'
|
||||
exclude group: 'org.codehaus.jackson', module: 'jackson-mapper-asl'
|
||||
exclude group: 'com.fasterxml.woodstox', module: 'woodstox-core'
|
||||
//exclude group: 'commons-collections', module: 'commons-collections' // This dependency is required in order for the program to run without errors. It is discontinued.
|
||||
}
|
||||
|
||||
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core
|
||||
implementation('org.apache.hadoop:hadoop-mapreduce-client-core:3.3.5') {
|
||||
exclude group: 'org.apache.parquet', module: 'parquet-avro'
|
||||
exclude group: 'org.apache.avro', module: 'avro'
|
||||
exclude group: 'org.slf4j', module: 'slf4j-api'
|
||||
exclude group: 'org.slf4j', module: 'slf4j-reload4j'
|
||||
exclude group: 'ch.qos.reload4j', module: 'reload4j'
|
||||
|
||||
// Vulnerable dependencies:
|
||||
exclude group: 'com.google.protobuf', module: 'protobuf-java'
|
||||
exclude group: 'io.netty', module: 'netty'
|
||||
}
|
||||
|
||||
// Add back some updated version of the needed dependencies.
|
||||
implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8.
|
||||
implementation 'com.fasterxml.woodstox:woodstox-core:6.5.1'
|
||||
|
||||
// https://mvnrepository.com/artifact/io.micrometer/micrometer-registry-prometheus
|
||||
runtimeOnly 'io.micrometer:micrometer-registry-prometheus:1.11.1'
|
||||
|
||||
testImplementation 'org.springframework.security:spring-security-test'
|
||||
testImplementation "org.springframework.boot:spring-boot-starter-test"
|
||||
}
|
||||
|
||||
tasks.named('test') {
|
||||
useJUnitPlatform()
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
version: '3.3'
|
||||
|
||||
services:
|
||||
pdf_aggregation_statistics:
|
||||
image: 'pdf_aggregation_service/pdf_aggregation_statistics:latest'
|
||||
container_name: pdf_aggregation_statistics
|
||||
ports:
|
||||
- '1882:1882'
|
||||
volumes:
|
||||
- type: bind
|
||||
source: $HOME/tmp/config
|
||||
target: /mnt/config
|
||||
- type: bind
|
||||
source: $HOME/logs
|
||||
target: /logs
|
||||
build:
|
||||
dockerfile: ./Dockerfile
|
||||
context: .
|
|
@ -0,0 +1,4 @@
|
|||
org.gradle.caching=true
|
||||
org.gradle.parallel=true
|
||||
org.gradle.caching.debug=false
|
||||
org.gradle.warning.mode=all
|
|
@ -0,0 +1,6 @@
|
|||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.1.1-bin.zip
|
||||
networkTimeout=10000
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
|
@ -0,0 +1,72 @@
|
|||
# This script installs and runs the project.
|
||||
|
||||
# For error-handling, we cannot use the "set -e" since: it has problems https://mywiki.wooledge.org/BashFAQ/105
|
||||
# So we have our own function, for use when a single command fails.
|
||||
handle_error () {
|
||||
echo -e "\n\n$1\n\n"; exit $2
|
||||
}
|
||||
|
||||
# Change the working directory to the script's directory, when running from another location.
|
||||
cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1
|
||||
|
||||
justInstall=0
|
||||
shouldRunInDocker=0
|
||||
|
||||
if [[ $# -eq 1 ]]; then
|
||||
justInstall=$1
|
||||
elif [[ $# -eq 2 ]]; then
|
||||
justInstall=$1
|
||||
shouldRunInDocker=$2
|
||||
elif [[ $# -gt 2 ]]; then
|
||||
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1> <shouldRunInDocker: 0 | 1>"; exit 2
|
||||
fi
|
||||
|
||||
if [[ justInstall -eq 1 && shouldRunInDocker -eq 1 ]]; then
|
||||
echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justInstall\" to < 0 >"
|
||||
justInstall=0
|
||||
fi
|
||||
|
||||
gradleVersion="8.1.1"
|
||||
|
||||
if [[ justInstall -eq 0 ]]; then
|
||||
|
||||
if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then
|
||||
wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip
|
||||
echo -e "\nAsking for sudo, in order to install 'gradle'..\n"
|
||||
sudo mkdir /opt/gradle
|
||||
sudo apt install -y unzip && sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip
|
||||
#ls /opt/gradle/gradle-${gradleVersion} # For debugging installation
|
||||
fi
|
||||
|
||||
export PATH=/opt/gradle/gradle-${gradleVersion}/bin:$PATH
|
||||
|
||||
gradle wrapper --gradle-version=${gradleVersion} --distribution-type=bin
|
||||
|
||||
#gradle tasks # For debugging installation
|
||||
#gradle -v # For debugging installation
|
||||
|
||||
gradle clean build
|
||||
|
||||
if [[ shouldRunInDocker -eq 1 ]]; then
|
||||
|
||||
echo -e "\nBuilding the docker image and running the containers..\n"
|
||||
sudo docker --version || handle_error "Docker was not found!" 3
|
||||
(sudo mkdir -p "$HOME"/tmp/config && sudo cp ./src/main/resources/application.yml "$HOME"/tmp/config) || true # This also replaces an existing "application.yml".
|
||||
sudo mkdir -p "$HOME"/logs || true
|
||||
|
||||
# Run in "detached mode" -d (in the background).
|
||||
(sudo docker compose up --build -d && echo -e "\nThe pdf_aggregation_statistics docker-container started running.\n") || handle_error "Could not build and/or run the 'pdf_aggregation_statistics' docker container!" 4
|
||||
|
||||
echo -e "Waiting 55 seconds before getting the status..\n"
|
||||
sleep 55
|
||||
sudo docker ps -a || handle_error "Could not get the status of docker-containers!" 6 # Using -a to get the status of failed containers as well.
|
||||
echo -e "\n\nGetting the logs of docker-container \"pdf_aggregation_statistics\":\n"
|
||||
sudo docker logs "$(sudo docker ps -aqf "name=^pdf_aggregation_statistics$")" || handle_error "Could not get the logs of docker-container \"pdf_aggregation_statistics\"!" 7 # Using "regex anchors" to avoid false-positives. Works even if the container is not running, thus showing the error-log.
|
||||
fi
|
||||
else
|
||||
export PATH=/opt/gradle/gradle-${gradleVersion}/bin:$PATH # Make sure the gradle is still accessible (it usually isn't without the "export").
|
||||
fi
|
||||
|
||||
if [[ shouldRunInDocker -ne 1 ]]; then
|
||||
gradle bootRun
|
||||
fi
|
|
@ -0,0 +1,6 @@
|
|||
pluginManagement {
|
||||
repositories {
|
||||
gradlePluginPortal()
|
||||
}
|
||||
}
|
||||
rootProject.name = 'pdf_aggregation_statistics'
|
|
@ -0,0 +1,32 @@
|
|||
package eu.openaire.pdf_aggregation_statistics.Components;
|
||||
|
||||
import eu.openaire.pdf_aggregation_statistics.services.StatsService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class SchedulingTasks {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(SchedulingTasks.class);
|
||||
|
||||
|
||||
@Autowired
|
||||
StatsService statsService;
|
||||
|
||||
|
||||
@Scheduled(initialDelay = 1, fixedDelay = 21_600_000) // Run right after initialization and then every 6 hours.
|
||||
public void gatherPayloadsPerDatasource()
|
||||
{
|
||||
// Request the number of payloads for each datasource and keep them in a ConcurrentHashMap,
|
||||
// where the "key" will be the "datasourceId" and the "value" will be the numOfPayloads for that datasource.
|
||||
|
||||
// When the user requests the numOfPayloads for a given datasourceI, the app will return the rwsult immediately
|
||||
// It will be a quick O(1) get operation in the HashMap.
|
||||
|
||||
statsService.gatherNumberOfPayloadsPerDatasource();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
package eu.openaire.pdf_aggregation_statistics;
|
||||
|
||||
import eu.openaire.pdf_aggregation_statistics.util.UriBuilder;
|
||||
import org.springframework.boot.CommandLineRunner;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.web.servlet.context.ServletWebServerApplicationContext;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.core.env.Environment;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
import org.springframework.web.cors.CorsConfiguration;
|
||||
import org.springframework.web.cors.CorsConfigurationSource;
|
||||
import org.springframework.web.cors.UrlBasedCorsConfigurationSource;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
@SpringBootApplication
|
||||
@EnableScheduling
|
||||
public class PdfAggregationStatisticsApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(PdfAggregationStatisticsApplication.class, args);
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public CorsConfigurationSource corsConfigurationSource() {
|
||||
CorsConfiguration configuration = new CorsConfiguration();
|
||||
configuration.setAllowedOrigins(Collections.singletonList("*"));
|
||||
configuration.setAllowedMethods(Collections.singletonList("GET"));
|
||||
configuration.setAllowedHeaders(Arrays.asList("authorization", "content-type", "x-auth-token"));
|
||||
configuration.setExposedHeaders(Collections.singletonList("x-auth-token"));
|
||||
UrlBasedCorsConfigurationSource source = new UrlBasedCorsConfigurationSource();
|
||||
source.registerCorsConfiguration("/**", configuration);
|
||||
return source;
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public CommandLineRunner setServerBaseUrl(Environment environment, ServletWebServerApplicationContext webServerAppCtxt)
|
||||
{
|
||||
return args -> new UriBuilder(environment, webServerAppCtxt);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
package eu.openaire.pdf_aggregation_statistics.controllers;
|
||||
|
||||
|
||||
import eu.openaire.pdf_aggregation_statistics.services.StatsServiceImpl;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
|
||||
/**
|
||||
* This controller returns statistics for the database.
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("/stats")
|
||||
public class StatsController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(StatsController.class);
|
||||
|
||||
|
||||
// This is a public API, so we will only support statistics that are actually needed and used by certain OpenAIRE Services.
|
||||
// So for now, only a few requested metrics will be available.
|
||||
|
||||
|
||||
/**
|
||||
* This endpoint returns the number of payloads related to the given datasourceID.
|
||||
* Example of a datasourceID (ArXiv): opendoar____::6f4922f45568161a8cdf4ad2299f6d23
|
||||
* */
|
||||
@GetMapping("getNumberOfPayloadsForDatasource")
|
||||
public ResponseEntity<?> getNumberOfPayloadsForDatasource(@RequestParam String datasourceId)
|
||||
{
|
||||
if ( logger.isDebugEnabled() )
|
||||
logger.debug("Received a \"getNumberOfPayloadsForDatasource\" request for datasourceID: " + datasourceId);
|
||||
|
||||
String errorMsg = "The given \"datasourceID\": \"" + datasourceId + "\" is not an valid datasourceID.";
|
||||
if ( datasourceId.length() != 46 ) {
|
||||
logger.error(errorMsg + " The number of its characters is different than 46.");
|
||||
return ResponseEntity.badRequest().body(errorMsg);
|
||||
} else {
|
||||
String[] parts = datasourceId.split("::", 2); // At most 2 parts will come out of the initial string.
|
||||
if ( (parts.length != 2) || (parts[0].length() != 12) || (parts[1].length() != 32) ) {
|
||||
logger.error(errorMsg + " It has non-valid parts.");
|
||||
return ResponseEntity.badRequest().body(errorMsg);
|
||||
}
|
||||
}
|
||||
|
||||
// Search the Hashmap and get the value for this datasource.
|
||||
// The Map has the numOfPayloads for all datasources, even for newly added ones.
|
||||
// If the given datasourceID is not found in the map, then either is not a datasource or that datasource is not participating in the OpenAIRE Graph.
|
||||
|
||||
if ( StatsServiceImpl.datasourcesWithNumOfPayloads.isEmpty() ) {
|
||||
errorMsg = "The \"datasourcesWithNumOfPayloads\" map was not populated!";
|
||||
logger.error(errorMsg);
|
||||
return ResponseEntity.internalServerError().body(errorMsg);
|
||||
}
|
||||
|
||||
Integer numPayloads = StatsServiceImpl.datasourcesWithNumOfPayloads.get(datasourceId);
|
||||
if ( numPayloads == null )
|
||||
return ResponseEntity.notFound().build();
|
||||
else
|
||||
return ResponseEntity.ok(numPayloads);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package eu.openaire.pdf_aggregation_statistics.security;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.security.config.annotation.method.configuration.EnableGlobalMethodSecurity;
|
||||
import org.springframework.security.config.annotation.web.builders.HttpSecurity;
|
||||
import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity;
|
||||
import org.springframework.security.config.http.SessionCreationPolicy;
|
||||
import org.springframework.security.web.SecurityFilterChain;
|
||||
|
||||
|
||||
@Configuration
|
||||
@EnableWebSecurity
|
||||
@EnableGlobalMethodSecurity (
|
||||
securedEnabled = false, // Just for now..
|
||||
jsr250Enabled = true,
|
||||
prePostEnabled = true
|
||||
)
|
||||
public class SecurityConfiguration {
|
||||
|
||||
@Bean
|
||||
public SecurityFilterChain filterChain(HttpSecurity http) throws Exception {
|
||||
http
|
||||
.headers()
|
||||
.frameOptions()
|
||||
.sameOrigin()
|
||||
.and()
|
||||
.cors()
|
||||
.and()
|
||||
.csrf()
|
||||
.disable()
|
||||
.exceptionHandling()
|
||||
.and()
|
||||
.sessionManagement()
|
||||
.sessionCreationPolicy(SessionCreationPolicy.STATELESS)
|
||||
.and()
|
||||
.authorizeRequests()
|
||||
.antMatchers("/**").permitAll()
|
||||
//.anyRequest().authenticated()
|
||||
//.and()
|
||||
//.requiresChannel()
|
||||
//.anyRequest().requiresSecure()
|
||||
;
|
||||
return http.build();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
package eu.openaire.pdf_aggregation_statistics.services;
|
||||
|
||||
|
||||
public interface StatsService {
|
||||
|
||||
void gatherNumberOfPayloadsPerDatasource();
|
||||
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
package eu.openaire.pdf_aggregation_statistics.services;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.dao.EmptyResultDataAccessException;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
|
||||
@Service
|
||||
public class StatsServiceImpl implements StatsService {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(StatsServiceImpl.class);
|
||||
|
||||
@Autowired
|
||||
private JdbcTemplate jdbcTemplate;
|
||||
|
||||
@Value("${database-name}")
|
||||
private String databaseName;
|
||||
|
||||
// No DB-lock is required for these READ-operations.
|
||||
|
||||
public static final ConcurrentHashMap<String, Integer> datasourcesWithNumOfPayloads = new ConcurrentHashMap<>(105_000); // The number of datasources is around 10_000.
|
||||
|
||||
|
||||
public void gatherNumberOfPayloadsPerDatasource()
|
||||
{
|
||||
final String getNumberOfPayloadsPerDatasourceQuery =
|
||||
"select d.id, count(p.id) as payload_count from " + databaseName + ".datasource d\n" +
|
||||
" join " + databaseName + ".publication pu on pu.datasourceid=d.id\n" +
|
||||
" left join " + databaseName + ".payload p on p.id=pu.id\n" + // We want the datasources with 0 payloads too, so we use "left join"
|
||||
" group by d.id"; // The group-by is needed.
|
||||
|
||||
if ( logger.isTraceEnabled() )
|
||||
logger.trace("getNumberOfPayloadsPerDatasourceQuery:\n" + getNumberOfPayloadsPerDatasourceQuery);
|
||||
|
||||
logger.info("Going to populate/update the \"datasourcesWithNumOfPayloads\" map.");
|
||||
try {
|
||||
jdbcTemplate.query(getNumberOfPayloadsPerDatasourceQuery, rs -> {
|
||||
try { // For each of the 4 columns returned. The indexing starts from 1
|
||||
datasourcesWithNumOfPayloads.put(rs.getString(1), rs.getInt(2)); // Updates then number for an existing datasourceId or adds a new mapping for a new datasourceId.
|
||||
} catch (SQLException sqle) {
|
||||
logger.error("No value was able to be retrieved from one of the columns of row_" + rs.getRow(), sqle);
|
||||
}
|
||||
});
|
||||
logger.info("The \"datasourcesWithNumOfPayloads\" map was populated/updated.");
|
||||
} catch (EmptyResultDataAccessException erdae) {
|
||||
logger.warn("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery);
|
||||
} catch (Exception e) {
|
||||
logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e);
|
||||
}
|
||||
}
|
||||
|
||||
// To get the human-friendly timestamp format from the BigInt in the database:
|
||||
// select from_timestamp(CAST(CAST(`date` as decimal(30,0))/1000 AS timestamp), "yyyy-MM-dd HH:mm:ss.SSS") from payload
|
||||
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
package eu.openaire.pdf_aggregation_statistics.util;
|
||||
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.boot.web.servlet.context.ServletWebServerApplicationContext;
|
||||
import org.springframework.core.env.Environment;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.InetAddress;
|
||||
import java.net.URL;
|
||||
|
||||
|
||||
public class UriBuilder {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(UriBuilder.class);
|
||||
|
||||
public static String ip = null;
|
||||
public static String baseUrl = null;
|
||||
|
||||
public UriBuilder(Environment environment, ServletWebServerApplicationContext webServerAppCtxt) {
|
||||
baseUrl = "http";
|
||||
|
||||
String sslEnabled = environment.getProperty("server.ssl.enabled");
|
||||
if (sslEnabled == null) { // It's expected to not exist if there is no SSL-configuration.
|
||||
logger.warn("No property \"server.ssl.enabled\" was found in \"application.yml\". Continuing with plain HTTP..");
|
||||
sslEnabled = "false";
|
||||
}
|
||||
baseUrl += sslEnabled.equals("true") ? "s" : "";
|
||||
baseUrl += "://";
|
||||
|
||||
if ( (ip = getPublicIP()) == null )
|
||||
ip = InetAddress.getLoopbackAddress().getHostAddress(); // Non-null.
|
||||
|
||||
baseUrl += ip + ":" + webServerAppCtxt.getWebServer().getPort();
|
||||
|
||||
String baseInternalPath = environment.getProperty("server.servlet.context-path");
|
||||
if ( baseInternalPath != null ) {
|
||||
if ( !baseInternalPath.startsWith("/") )
|
||||
baseUrl += "/";
|
||||
baseUrl += baseInternalPath;
|
||||
if ( !baseInternalPath.endsWith("/") )
|
||||
baseUrl += "/";
|
||||
} else {
|
||||
logger.warn("No property \"server.servlet.context-path\" was found in \"application.yml\"!"); // Yes it's expected.
|
||||
baseUrl += "/";
|
||||
}
|
||||
|
||||
logger.debug("ServerBaseURL: " + baseUrl);
|
||||
}
|
||||
|
||||
private static String getPublicIP()
|
||||
{
|
||||
String publicIpAddress = "";
|
||||
HttpURLConnection conn = null;
|
||||
String urlString = "https://checkip.amazonaws.com/";
|
||||
try {
|
||||
conn = (HttpURLConnection) new URL(urlString).openConnection();
|
||||
conn.setConnectTimeout(60_000); // 1 minute
|
||||
conn.setReadTimeout(120_000); // 2 minutes
|
||||
conn.setRequestMethod("GET");
|
||||
conn.connect();
|
||||
|
||||
int responseCode = conn.getResponseCode();
|
||||
if ( responseCode != 200 ) {
|
||||
logger.warn("Cannot get the publicIP address for this machine, as \"" + urlString + "\" returned the HTTP-error-code: " + responseCode);
|
||||
return null;
|
||||
}
|
||||
|
||||
try ( BufferedReader bf = new BufferedReader(new InputStreamReader(conn.getInputStream()))) {
|
||||
publicIpAddress = bf.readLine().trim();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.warn("Cannot get the publicIP address for this machine, from \"" + urlString + "\"!", e);
|
||||
return null;
|
||||
} finally {
|
||||
if ( conn != null )
|
||||
conn.disconnect();
|
||||
}
|
||||
return publicIpAddress;
|
||||
}
|
||||
|
||||
public static String getBaseUrl() {
|
||||
return baseUrl;
|
||||
}
|
||||
|
||||
public static void setBaseUrl(String baseUrl) {
|
||||
UriBuilder.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
server:
|
||||
port: 1882
|
||||
servlet:
|
||||
context-path: /api
|
||||
shutdown: graceful
|
||||
|
||||
database-name: pdfaggregation_i
|
||||
|
||||
spring:
|
||||
application:
|
||||
name: PDF_Aggregation_Statistics
|
||||
datasource:
|
||||
driver-class-name: com.cloudera.impala.jdbc41.Driver
|
||||
url: jdbc:impala://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/
|
||||
username: ''
|
||||
password: ''
|
||||
hikari:
|
||||
connectionTimeout: 30000
|
||||
idleTimeout: 600000
|
||||
maxLifetime: 1800000
|
||||
maximumPoolSize: 20
|
||||
minimumIdle: 4
|
||||
pool-name: StatisticsPool
|
||||
output:
|
||||
ansi:
|
||||
enabled: always
|
||||
lifecycle:
|
||||
timeout-per-shutdown-phase: 2m
|
||||
|
||||
# Prometheus related config.
|
||||
management:
|
||||
endpoint:
|
||||
health:
|
||||
enabled: true
|
||||
show-details: always
|
||||
metrics:
|
||||
enabled: true
|
||||
prometheus:
|
||||
enabled: true
|
||||
endpoints:
|
||||
web:
|
||||
base-path: /actuator
|
||||
exposure:
|
||||
include: health,info,prometheus,metrics
|
||||
metrics:
|
||||
tags:
|
||||
application: ${spring.application.name}
|
||||
|
||||
|
||||
logging:
|
||||
level:
|
||||
root: INFO
|
||||
eu:
|
||||
openaire:
|
||||
pdf_aggregation_statistics: DEBUG
|
||||
org:
|
||||
springframework:
|
||||
security: WARN
|
||||
web: INFO
|
||||
apache:
|
||||
hadoop:
|
||||
io:
|
||||
compress: WARN
|
|
@ -0,0 +1,33 @@
|
|||
<configuration debug="false">
|
||||
|
||||
<appender name="RollingFile" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>logs/PDF_Aggregation_Statistics.log</file>
|
||||
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.FixedWindowRollingPolicy">
|
||||
<fileNamePattern>logs/PDF_Aggregation_Statistics.%i.log.zip</fileNamePattern>
|
||||
<minIndex>1</minIndex>
|
||||
<maxIndex>20</maxIndex>
|
||||
</rollingPolicy>
|
||||
|
||||
<triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
|
||||
<maxFileSize>50MB</maxFileSize>
|
||||
</triggeringPolicy>
|
||||
|
||||
<encoder>
|
||||
<charset>UTF-8</charset>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder>
|
||||
<charset>UTF-8</charset>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<root level="debug">
|
||||
<appender-ref ref="Console" />
|
||||
</root>
|
||||
|
||||
</configuration>
|
|
@ -0,0 +1,12 @@
|
|||
package eu.openaire.pdf_aggregation_statistics;
|
||||
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
|
||||
@SpringBootTest
|
||||
class PdfAggregationStatisticsApplicationTests {
|
||||
|
||||
//@Test
|
||||
void contextLoads() {
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue