Fully functional "PDF Aggregation Statistics" Service.
This commit is contained in:
commit
cf0aed5274
|
@ -0,0 +1,37 @@
|
||||||
|
HELP.md
|
||||||
|
.gradle
|
||||||
|
build/
|
||||||
|
!gradle/wrapper/gradle-wrapper.jar
|
||||||
|
!**/src/main/**/build/
|
||||||
|
!**/src/test/**/build/
|
||||||
|
|
||||||
|
### STS ###
|
||||||
|
.apt_generated
|
||||||
|
.classpath
|
||||||
|
.factorypath
|
||||||
|
.project
|
||||||
|
.settings
|
||||||
|
.springBeans
|
||||||
|
.sts4-cache
|
||||||
|
bin/
|
||||||
|
!**/src/main/**/bin/
|
||||||
|
!**/src/test/**/bin/
|
||||||
|
|
||||||
|
### IntelliJ IDEA ###
|
||||||
|
.idea
|
||||||
|
*.iws
|
||||||
|
*.iml
|
||||||
|
*.ipr
|
||||||
|
out/
|
||||||
|
!**/src/main/**/out/
|
||||||
|
!**/src/test/**/out/
|
||||||
|
|
||||||
|
### NetBeans ###
|
||||||
|
/nbproject/private/
|
||||||
|
/nbbuild/
|
||||||
|
/dist/
|
||||||
|
/nbdist/
|
||||||
|
/.nb-gradle/
|
||||||
|
|
||||||
|
### VS Code ###
|
||||||
|
.vscode/
|
|
@ -0,0 +1,7 @@
|
||||||
|
FROM openjdk:8-jdk-alpine
|
||||||
|
|
||||||
|
COPY build/libs/*-SNAPSHOT.jar pdf_aggregation_statistics.jar
|
||||||
|
|
||||||
|
EXPOSE 1882
|
||||||
|
|
||||||
|
ENTRYPOINT ["java","-jar","/pdf_aggregation_statistics.jar", "--spring.config.location=file:///mnt/config/application.yml"]
|
|
@ -0,0 +1,12 @@
|
||||||
|
# PDF Aggregation Statistics
|
||||||
|
|
||||||
|
This is a public API to get specific statistics from the PDF Aggregation Service.
|
||||||
|
|
||||||
|
|
||||||
|
**To install and run the application**:
|
||||||
|
- Run ```git clone``` and then ```cd pdf_aggregation_statistics```.
|
||||||
|
- Set the preferable values inside the [__application.yml__](https://code-repo.d4science.org/lsmyrnaios/pdf_aggregation_statistics/src/branch/master/src/main/resources/application.yml) file.
|
||||||
|
- Execute the ```installAndRun.sh``` script which builds and runs the app.<br>
|
||||||
|
If you want to just run the app, then run the script with the argument "1": ```./installAndRun.sh 1```.<br>
|
||||||
|
If you want to build and run the app on a **Docker Container**, then run the script with the argument "0" followed by the argument "1": ```./installAndRun.sh 0 1```.<br>
|
||||||
|
<br>
|
|
@ -0,0 +1,100 @@
|
||||||
|
plugins {
|
||||||
|
id 'java'
|
||||||
|
id 'org.springframework.boot' version '2.7.12'
|
||||||
|
id 'io.spring.dependency-management' version '1.1.0'
|
||||||
|
}
|
||||||
|
|
||||||
|
group = 'eu.openaire.pdf_aggregation_statistics'
|
||||||
|
version = '0.0.1-SNAPSHOT'
|
||||||
|
sourceCompatibility = '1.8'
|
||||||
|
|
||||||
|
repositories {
|
||||||
|
mavenCentral()
|
||||||
|
maven {
|
||||||
|
name "omtd"
|
||||||
|
url "https://repo.openminted.eu/content/repositories/releases/"
|
||||||
|
}
|
||||||
|
maven {
|
||||||
|
name "pentaho-repo"
|
||||||
|
url "https://public.nexus.pentaho.org/content/groups/omni/"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
runtimeOnly "org.springframework.boot:spring-boot-devtools"
|
||||||
|
|
||||||
|
implementation "org.springframework.boot:spring-boot-starter-web"
|
||||||
|
implementation("org.springframework.boot:spring-boot-starter-security")
|
||||||
|
implementation("org.springframework.boot:spring-boot-starter-jdbc")
|
||||||
|
|
||||||
|
implementation("org.springframework.boot:spring-boot-configuration-processor")
|
||||||
|
implementation("org.springframework.boot:spring-boot-starter-actuator")
|
||||||
|
implementation("org.springframework.boot:spring-boot-starter-aop")
|
||||||
|
|
||||||
|
implementation("org.springframework.security:spring-security-core")
|
||||||
|
implementation("org.springframework.security:spring-security-web")
|
||||||
|
implementation("org.springframework.security:spring-security-config")
|
||||||
|
|
||||||
|
// https://mvnrepository.com/artifact/com.cloudera.impala/jdbc
|
||||||
|
implementation("com.cloudera.impala:jdbc:2.5.31") {
|
||||||
|
exclude group: 'org.apache.hive', module: 'hive-exec'
|
||||||
|
exclude group: 'com.twitter', module: 'parquet-hadoop-bundle'
|
||||||
|
exclude group: 'org.apache.parquet', module: 'parquet-avro'
|
||||||
|
exclude group: 'org.apache.avro', module: 'avro'
|
||||||
|
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
|
||||||
|
exclude group: 'org.apache.derby', module: 'derby'
|
||||||
|
exclude group: 'org.eclipse.jetty.aggregate', module: 'jetty-all'
|
||||||
|
exclude group: 'ch.qos.log4j', module: 'log4j'
|
||||||
|
exclude group: 'ch.qos.log4j', module: 'apache-log4j-extras'
|
||||||
|
|
||||||
|
// Vulnerable dependencies:
|
||||||
|
exclude group: 'log4j', module: 'log4j'
|
||||||
|
exclude group: 'org.apache.ant', module: 'ant'
|
||||||
|
exclude group: 'org.apache.thrift', module: 'libthrift' // This is an older version (we add the updated one later).
|
||||||
|
exclude group: 'org.apache.hive', module: 'hive-metastore'
|
||||||
|
// Avoid excluding 'org.apache.hive:hive-service', as this is needed and unfortunately, even adding a newer version separately, it introducing other vulnerable dependencies.
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common
|
||||||
|
implementation('org.apache.hadoop:hadoop-common:3.3.5') {
|
||||||
|
exclude group: 'org.apache.parquet', module: 'parquet-avro'
|
||||||
|
exclude group: 'org.apache.avro', module: 'avro'
|
||||||
|
exclude group: 'org.slf4j', module: 'slf4j-api'
|
||||||
|
exclude group: 'org.slf4j', module: 'slf4j-reload4j'
|
||||||
|
exclude group: 'ch.qos.reload4j', module: 'reload4j'
|
||||||
|
|
||||||
|
// Vulnerable dependencies:
|
||||||
|
exclude group: 'com.google.protobuf', module: 'protobuf-java'
|
||||||
|
exclude group: 'org.codehaus.jackson', module: 'jackson-core-asl'
|
||||||
|
exclude group: 'org.codehaus.jackson', module: 'jackson-mapper-asl'
|
||||||
|
exclude group: 'com.fasterxml.woodstox', module: 'woodstox-core'
|
||||||
|
//exclude group: 'commons-collections', module: 'commons-collections' // This dependency is required in order for the program to run without errors. It is discontinued.
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core
|
||||||
|
implementation('org.apache.hadoop:hadoop-mapreduce-client-core:3.3.5') {
|
||||||
|
exclude group: 'org.apache.parquet', module: 'parquet-avro'
|
||||||
|
exclude group: 'org.apache.avro', module: 'avro'
|
||||||
|
exclude group: 'org.slf4j', module: 'slf4j-api'
|
||||||
|
exclude group: 'org.slf4j', module: 'slf4j-reload4j'
|
||||||
|
exclude group: 'ch.qos.reload4j', module: 'reload4j'
|
||||||
|
|
||||||
|
// Vulnerable dependencies:
|
||||||
|
exclude group: 'com.google.protobuf', module: 'protobuf-java'
|
||||||
|
exclude group: 'io.netty', module: 'netty'
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add back some updated version of the needed dependencies.
|
||||||
|
implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8.
|
||||||
|
implementation 'com.fasterxml.woodstox:woodstox-core:6.5.1'
|
||||||
|
|
||||||
|
// https://mvnrepository.com/artifact/io.micrometer/micrometer-registry-prometheus
|
||||||
|
runtimeOnly 'io.micrometer:micrometer-registry-prometheus:1.11.1'
|
||||||
|
|
||||||
|
testImplementation 'org.springframework.security:spring-security-test'
|
||||||
|
testImplementation "org.springframework.boot:spring-boot-starter-test"
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.named('test') {
|
||||||
|
useJUnitPlatform()
|
||||||
|
}
|
|
@ -0,0 +1,18 @@
|
||||||
|
version: '3.3'
|
||||||
|
|
||||||
|
services:
|
||||||
|
pdf_aggregation_statistics:
|
||||||
|
image: 'pdf_aggregation_service/pdf_aggregation_statistics:latest'
|
||||||
|
container_name: pdf_aggregation_statistics
|
||||||
|
ports:
|
||||||
|
- '1882:1882'
|
||||||
|
volumes:
|
||||||
|
- type: bind
|
||||||
|
source: $HOME/tmp/config
|
||||||
|
target: /mnt/config
|
||||||
|
- type: bind
|
||||||
|
source: $HOME/logs
|
||||||
|
target: /logs
|
||||||
|
build:
|
||||||
|
dockerfile: ./Dockerfile
|
||||||
|
context: .
|
|
@ -0,0 +1,4 @@
|
||||||
|
org.gradle.caching=true
|
||||||
|
org.gradle.parallel=true
|
||||||
|
org.gradle.caching.debug=false
|
||||||
|
org.gradle.warning.mode=all
|
|
@ -0,0 +1,6 @@
|
||||||
|
distributionBase=GRADLE_USER_HOME
|
||||||
|
distributionPath=wrapper/dists
|
||||||
|
distributionUrl=https\://services.gradle.org/distributions/gradle-8.1.1-bin.zip
|
||||||
|
networkTimeout=10000
|
||||||
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
|
zipStorePath=wrapper/dists
|
|
@ -0,0 +1,72 @@
|
||||||
|
# This script installs and runs the project.
|
||||||
|
|
||||||
|
# For error-handling, we cannot use the "set -e" since: it has problems https://mywiki.wooledge.org/BashFAQ/105
|
||||||
|
# So we have our own function, for use when a single command fails.
|
||||||
|
handle_error () {
|
||||||
|
echo -e "\n\n$1\n\n"; exit $2
|
||||||
|
}
|
||||||
|
|
||||||
|
# Change the working directory to the script's directory, when running from another location.
|
||||||
|
cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1
|
||||||
|
|
||||||
|
justInstall=0
|
||||||
|
shouldRunInDocker=0
|
||||||
|
|
||||||
|
if [[ $# -eq 1 ]]; then
|
||||||
|
justInstall=$1
|
||||||
|
elif [[ $# -eq 2 ]]; then
|
||||||
|
justInstall=$1
|
||||||
|
shouldRunInDocker=$2
|
||||||
|
elif [[ $# -gt 2 ]]; then
|
||||||
|
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1> <shouldRunInDocker: 0 | 1>"; exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ justInstall -eq 1 && shouldRunInDocker -eq 1 ]]; then
|
||||||
|
echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justInstall\" to < 0 >"
|
||||||
|
justInstall=0
|
||||||
|
fi
|
||||||
|
|
||||||
|
gradleVersion="8.1.1"
|
||||||
|
|
||||||
|
if [[ justInstall -eq 0 ]]; then
|
||||||
|
|
||||||
|
if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then
|
||||||
|
wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip
|
||||||
|
echo -e "\nAsking for sudo, in order to install 'gradle'..\n"
|
||||||
|
sudo mkdir /opt/gradle
|
||||||
|
sudo apt install -y unzip && sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip
|
||||||
|
#ls /opt/gradle/gradle-${gradleVersion} # For debugging installation
|
||||||
|
fi
|
||||||
|
|
||||||
|
export PATH=/opt/gradle/gradle-${gradleVersion}/bin:$PATH
|
||||||
|
|
||||||
|
gradle wrapper --gradle-version=${gradleVersion} --distribution-type=bin
|
||||||
|
|
||||||
|
#gradle tasks # For debugging installation
|
||||||
|
#gradle -v # For debugging installation
|
||||||
|
|
||||||
|
gradle clean build
|
||||||
|
|
||||||
|
if [[ shouldRunInDocker -eq 1 ]]; then
|
||||||
|
|
||||||
|
echo -e "\nBuilding the docker image and running the containers..\n"
|
||||||
|
sudo docker --version || handle_error "Docker was not found!" 3
|
||||||
|
(sudo mkdir -p "$HOME"/tmp/config && sudo cp ./src/main/resources/application.yml "$HOME"/tmp/config) || true # This also replaces an existing "application.yml".
|
||||||
|
sudo mkdir -p "$HOME"/logs || true
|
||||||
|
|
||||||
|
# Run in "detached mode" -d (in the background).
|
||||||
|
(sudo docker compose up --build -d && echo -e "\nThe pdf_aggregation_statistics docker-container started running.\n") || handle_error "Could not build and/or run the 'pdf_aggregation_statistics' docker container!" 4
|
||||||
|
|
||||||
|
echo -e "Waiting 55 seconds before getting the status..\n"
|
||||||
|
sleep 55
|
||||||
|
sudo docker ps -a || handle_error "Could not get the status of docker-containers!" 6 # Using -a to get the status of failed containers as well.
|
||||||
|
echo -e "\n\nGetting the logs of docker-container \"pdf_aggregation_statistics\":\n"
|
||||||
|
sudo docker logs "$(sudo docker ps -aqf "name=^pdf_aggregation_statistics$")" || handle_error "Could not get the logs of docker-container \"pdf_aggregation_statistics\"!" 7 # Using "regex anchors" to avoid false-positives. Works even if the container is not running, thus showing the error-log.
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
export PATH=/opt/gradle/gradle-${gradleVersion}/bin:$PATH # Make sure the gradle is still accessible (it usually isn't without the "export").
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ shouldRunInDocker -ne 1 ]]; then
|
||||||
|
gradle bootRun
|
||||||
|
fi
|
|
@ -0,0 +1,6 @@
|
||||||
|
pluginManagement {
|
||||||
|
repositories {
|
||||||
|
gradlePluginPortal()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rootProject.name = 'pdf_aggregation_statistics'
|
|
@ -0,0 +1,32 @@
|
||||||
|
package eu.openaire.pdf_aggregation_statistics.Components;
|
||||||
|
|
||||||
|
import eu.openaire.pdf_aggregation_statistics.services.StatsService;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class SchedulingTasks {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(SchedulingTasks.class);
|
||||||
|
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
StatsService statsService;
|
||||||
|
|
||||||
|
|
||||||
|
@Scheduled(initialDelay = 1, fixedDelay = 21_600_000) // Run right after initialization and then every 6 hours.
|
||||||
|
public void gatherPayloadsPerDatasource()
|
||||||
|
{
|
||||||
|
// Request the number of payloads for each datasource and keep them in a ConcurrentHashMap,
|
||||||
|
// where the "key" will be the "datasourceId" and the "value" will be the numOfPayloads for that datasource.
|
||||||
|
|
||||||
|
// When the user requests the numOfPayloads for a given datasourceI, the app will return the rwsult immediately
|
||||||
|
// It will be a quick O(1) get operation in the HashMap.
|
||||||
|
|
||||||
|
statsService.gatherNumberOfPayloadsPerDatasource();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,46 @@
|
||||||
|
package eu.openaire.pdf_aggregation_statistics;
|
||||||
|
|
||||||
|
import eu.openaire.pdf_aggregation_statistics.util.UriBuilder;
|
||||||
|
import org.springframework.boot.CommandLineRunner;
|
||||||
|
import org.springframework.boot.SpringApplication;
|
||||||
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
|
import org.springframework.boot.web.servlet.context.ServletWebServerApplicationContext;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.core.env.Environment;
|
||||||
|
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||||
|
import org.springframework.web.cors.CorsConfiguration;
|
||||||
|
import org.springframework.web.cors.CorsConfigurationSource;
|
||||||
|
import org.springframework.web.cors.UrlBasedCorsConfigurationSource;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
@SpringBootApplication
|
||||||
|
@EnableScheduling
|
||||||
|
public class PdfAggregationStatisticsApplication {
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
SpringApplication.run(PdfAggregationStatisticsApplication.class, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public CorsConfigurationSource corsConfigurationSource() {
|
||||||
|
CorsConfiguration configuration = new CorsConfiguration();
|
||||||
|
configuration.setAllowedOrigins(Collections.singletonList("*"));
|
||||||
|
configuration.setAllowedMethods(Collections.singletonList("GET"));
|
||||||
|
configuration.setAllowedHeaders(Arrays.asList("authorization", "content-type", "x-auth-token"));
|
||||||
|
configuration.setExposedHeaders(Collections.singletonList("x-auth-token"));
|
||||||
|
UrlBasedCorsConfigurationSource source = new UrlBasedCorsConfigurationSource();
|
||||||
|
source.registerCorsConfiguration("/**", configuration);
|
||||||
|
return source;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public CommandLineRunner setServerBaseUrl(Environment environment, ServletWebServerApplicationContext webServerAppCtxt)
|
||||||
|
{
|
||||||
|
return args -> new UriBuilder(environment, webServerAppCtxt);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,67 @@
|
||||||
|
package eu.openaire.pdf_aggregation_statistics.controllers;
|
||||||
|
|
||||||
|
|
||||||
|
import eu.openaire.pdf_aggregation_statistics.services.StatsServiceImpl;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestParam;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This controller returns statistics for the database.
|
||||||
|
*/
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/stats")
|
||||||
|
public class StatsController {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(StatsController.class);
|
||||||
|
|
||||||
|
|
||||||
|
// This is a public API, so we will only support statistics that are actually needed and used by certain OpenAIRE Services.
|
||||||
|
// So for now, only a few requested metrics will be available.
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This endpoint returns the number of payloads related to the given datasourceID.
|
||||||
|
* Example of a datasourceID (ArXiv): opendoar____::6f4922f45568161a8cdf4ad2299f6d23
|
||||||
|
* */
|
||||||
|
@GetMapping("getNumberOfPayloadsForDatasource")
|
||||||
|
public ResponseEntity<?> getNumberOfPayloadsForDatasource(@RequestParam String datasourceId)
|
||||||
|
{
|
||||||
|
if ( logger.isDebugEnabled() )
|
||||||
|
logger.debug("Received a \"getNumberOfPayloadsForDatasource\" request for datasourceID: " + datasourceId);
|
||||||
|
|
||||||
|
String errorMsg = "The given \"datasourceID\": \"" + datasourceId + "\" is not an valid datasourceID.";
|
||||||
|
if ( datasourceId.length() != 46 ) {
|
||||||
|
logger.error(errorMsg + " The number of its characters is different than 46.");
|
||||||
|
return ResponseEntity.badRequest().body(errorMsg);
|
||||||
|
} else {
|
||||||
|
String[] parts = datasourceId.split("::", 2); // At most 2 parts will come out of the initial string.
|
||||||
|
if ( (parts.length != 2) || (parts[0].length() != 12) || (parts[1].length() != 32) ) {
|
||||||
|
logger.error(errorMsg + " It has non-valid parts.");
|
||||||
|
return ResponseEntity.badRequest().body(errorMsg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search the Hashmap and get the value for this datasource.
|
||||||
|
// The Map has the numOfPayloads for all datasources, even for newly added ones.
|
||||||
|
// If the given datasourceID is not found in the map, then either is not a datasource or that datasource is not participating in the OpenAIRE Graph.
|
||||||
|
|
||||||
|
if ( StatsServiceImpl.datasourcesWithNumOfPayloads.isEmpty() ) {
|
||||||
|
errorMsg = "The \"datasourcesWithNumOfPayloads\" map was not populated!";
|
||||||
|
logger.error(errorMsg);
|
||||||
|
return ResponseEntity.internalServerError().body(errorMsg);
|
||||||
|
}
|
||||||
|
|
||||||
|
Integer numPayloads = StatsServiceImpl.datasourcesWithNumOfPayloads.get(datasourceId);
|
||||||
|
if ( numPayloads == null )
|
||||||
|
return ResponseEntity.notFound().build();
|
||||||
|
else
|
||||||
|
return ResponseEntity.ok(numPayloads);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,47 @@
|
||||||
|
package eu.openaire.pdf_aggregation_statistics.security;
|
||||||
|
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.security.config.annotation.method.configuration.EnableGlobalMethodSecurity;
|
||||||
|
import org.springframework.security.config.annotation.web.builders.HttpSecurity;
|
||||||
|
import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity;
|
||||||
|
import org.springframework.security.config.http.SessionCreationPolicy;
|
||||||
|
import org.springframework.security.web.SecurityFilterChain;
|
||||||
|
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@EnableWebSecurity
|
||||||
|
@EnableGlobalMethodSecurity (
|
||||||
|
securedEnabled = false, // Just for now..
|
||||||
|
jsr250Enabled = true,
|
||||||
|
prePostEnabled = true
|
||||||
|
)
|
||||||
|
public class SecurityConfiguration {
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public SecurityFilterChain filterChain(HttpSecurity http) throws Exception {
|
||||||
|
http
|
||||||
|
.headers()
|
||||||
|
.frameOptions()
|
||||||
|
.sameOrigin()
|
||||||
|
.and()
|
||||||
|
.cors()
|
||||||
|
.and()
|
||||||
|
.csrf()
|
||||||
|
.disable()
|
||||||
|
.exceptionHandling()
|
||||||
|
.and()
|
||||||
|
.sessionManagement()
|
||||||
|
.sessionCreationPolicy(SessionCreationPolicy.STATELESS)
|
||||||
|
.and()
|
||||||
|
.authorizeRequests()
|
||||||
|
.antMatchers("/**").permitAll()
|
||||||
|
//.anyRequest().authenticated()
|
||||||
|
//.and()
|
||||||
|
//.requiresChannel()
|
||||||
|
//.anyRequest().requiresSecure()
|
||||||
|
;
|
||||||
|
return http.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,8 @@
|
||||||
|
package eu.openaire.pdf_aggregation_statistics.services;
|
||||||
|
|
||||||
|
|
||||||
|
public interface StatsService {
|
||||||
|
|
||||||
|
void gatherNumberOfPayloadsPerDatasource();
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,62 @@
|
||||||
|
package eu.openaire.pdf_aggregation_statistics.services;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.dao.EmptyResultDataAccessException;
|
||||||
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
|
||||||
|
@Service
|
||||||
|
public class StatsServiceImpl implements StatsService {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(StatsServiceImpl.class);
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private JdbcTemplate jdbcTemplate;
|
||||||
|
|
||||||
|
@Value("${database-name}")
|
||||||
|
private String databaseName;
|
||||||
|
|
||||||
|
// No DB-lock is required for these READ-operations.
|
||||||
|
|
||||||
|
public static final ConcurrentHashMap<String, Integer> datasourcesWithNumOfPayloads = new ConcurrentHashMap<>(105_000); // The number of datasources is around 10_000.
|
||||||
|
|
||||||
|
|
||||||
|
public void gatherNumberOfPayloadsPerDatasource()
|
||||||
|
{
|
||||||
|
final String getNumberOfPayloadsPerDatasourceQuery =
|
||||||
|
"select d.id, count(p.id) as payload_count from " + databaseName + ".datasource d\n" +
|
||||||
|
" join " + databaseName + ".publication pu on pu.datasourceid=d.id\n" +
|
||||||
|
" left join " + databaseName + ".payload p on p.id=pu.id\n" + // We want the datasources with 0 payloads too, so we use "left join"
|
||||||
|
" group by d.id"; // The group-by is needed.
|
||||||
|
|
||||||
|
if ( logger.isTraceEnabled() )
|
||||||
|
logger.trace("getNumberOfPayloadsPerDatasourceQuery:\n" + getNumberOfPayloadsPerDatasourceQuery);
|
||||||
|
|
||||||
|
logger.info("Going to populate/update the \"datasourcesWithNumOfPayloads\" map.");
|
||||||
|
try {
|
||||||
|
jdbcTemplate.query(getNumberOfPayloadsPerDatasourceQuery, rs -> {
|
||||||
|
try { // For each of the 4 columns returned. The indexing starts from 1
|
||||||
|
datasourcesWithNumOfPayloads.put(rs.getString(1), rs.getInt(2)); // Updates then number for an existing datasourceId or adds a new mapping for a new datasourceId.
|
||||||
|
} catch (SQLException sqle) {
|
||||||
|
logger.error("No value was able to be retrieved from one of the columns of row_" + rs.getRow(), sqle);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
logger.info("The \"datasourcesWithNumOfPayloads\" map was populated/updated.");
|
||||||
|
} catch (EmptyResultDataAccessException erdae) {
|
||||||
|
logger.warn("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// To get the human-friendly timestamp format from the BigInt in the database:
|
||||||
|
// select from_timestamp(CAST(CAST(`date` as decimal(30,0))/1000 AS timestamp), "yyyy-MM-dd HH:mm:ss.SSS") from payload
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,93 @@
|
||||||
|
package eu.openaire.pdf_aggregation_statistics.util;
|
||||||
|
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.boot.web.servlet.context.ServletWebServerApplicationContext;
|
||||||
|
import org.springframework.core.env.Environment;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.net.HttpURLConnection;
|
||||||
|
import java.net.InetAddress;
|
||||||
|
import java.net.URL;
|
||||||
|
|
||||||
|
|
||||||
|
public class UriBuilder {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(UriBuilder.class);
|
||||||
|
|
||||||
|
public static String ip = null;
|
||||||
|
public static String baseUrl = null;
|
||||||
|
|
||||||
|
public UriBuilder(Environment environment, ServletWebServerApplicationContext webServerAppCtxt) {
|
||||||
|
baseUrl = "http";
|
||||||
|
|
||||||
|
String sslEnabled = environment.getProperty("server.ssl.enabled");
|
||||||
|
if (sslEnabled == null) { // It's expected to not exist if there is no SSL-configuration.
|
||||||
|
logger.warn("No property \"server.ssl.enabled\" was found in \"application.yml\". Continuing with plain HTTP..");
|
||||||
|
sslEnabled = "false";
|
||||||
|
}
|
||||||
|
baseUrl += sslEnabled.equals("true") ? "s" : "";
|
||||||
|
baseUrl += "://";
|
||||||
|
|
||||||
|
if ( (ip = getPublicIP()) == null )
|
||||||
|
ip = InetAddress.getLoopbackAddress().getHostAddress(); // Non-null.
|
||||||
|
|
||||||
|
baseUrl += ip + ":" + webServerAppCtxt.getWebServer().getPort();
|
||||||
|
|
||||||
|
String baseInternalPath = environment.getProperty("server.servlet.context-path");
|
||||||
|
if ( baseInternalPath != null ) {
|
||||||
|
if ( !baseInternalPath.startsWith("/") )
|
||||||
|
baseUrl += "/";
|
||||||
|
baseUrl += baseInternalPath;
|
||||||
|
if ( !baseInternalPath.endsWith("/") )
|
||||||
|
baseUrl += "/";
|
||||||
|
} else {
|
||||||
|
logger.warn("No property \"server.servlet.context-path\" was found in \"application.yml\"!"); // Yes it's expected.
|
||||||
|
baseUrl += "/";
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug("ServerBaseURL: " + baseUrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getPublicIP()
|
||||||
|
{
|
||||||
|
String publicIpAddress = "";
|
||||||
|
HttpURLConnection conn = null;
|
||||||
|
String urlString = "https://checkip.amazonaws.com/";
|
||||||
|
try {
|
||||||
|
conn = (HttpURLConnection) new URL(urlString).openConnection();
|
||||||
|
conn.setConnectTimeout(60_000); // 1 minute
|
||||||
|
conn.setReadTimeout(120_000); // 2 minutes
|
||||||
|
conn.setRequestMethod("GET");
|
||||||
|
conn.connect();
|
||||||
|
|
||||||
|
int responseCode = conn.getResponseCode();
|
||||||
|
if ( responseCode != 200 ) {
|
||||||
|
logger.warn("Cannot get the publicIP address for this machine, as \"" + urlString + "\" returned the HTTP-error-code: " + responseCode);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try ( BufferedReader bf = new BufferedReader(new InputStreamReader(conn.getInputStream()))) {
|
||||||
|
publicIpAddress = bf.readLine().trim();
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.warn("Cannot get the publicIP address for this machine, from \"" + urlString + "\"!", e);
|
||||||
|
return null;
|
||||||
|
} finally {
|
||||||
|
if ( conn != null )
|
||||||
|
conn.disconnect();
|
||||||
|
}
|
||||||
|
return publicIpAddress;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getBaseUrl() {
|
||||||
|
return baseUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void setBaseUrl(String baseUrl) {
|
||||||
|
UriBuilder.baseUrl = baseUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,63 @@
|
||||||
|
server:
|
||||||
|
port: 1882
|
||||||
|
servlet:
|
||||||
|
context-path: /api
|
||||||
|
shutdown: graceful
|
||||||
|
|
||||||
|
database-name: pdfaggregation_i
|
||||||
|
|
||||||
|
spring:
|
||||||
|
application:
|
||||||
|
name: PDF_Aggregation_Statistics
|
||||||
|
datasource:
|
||||||
|
driver-class-name: com.cloudera.impala.jdbc41.Driver
|
||||||
|
url: jdbc:impala://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/
|
||||||
|
username: ''
|
||||||
|
password: ''
|
||||||
|
hikari:
|
||||||
|
connectionTimeout: 30000
|
||||||
|
idleTimeout: 600000
|
||||||
|
maxLifetime: 1800000
|
||||||
|
maximumPoolSize: 20
|
||||||
|
minimumIdle: 4
|
||||||
|
pool-name: StatisticsPool
|
||||||
|
output:
|
||||||
|
ansi:
|
||||||
|
enabled: always
|
||||||
|
lifecycle:
|
||||||
|
timeout-per-shutdown-phase: 2m
|
||||||
|
|
||||||
|
# Prometheus related config.
|
||||||
|
management:
|
||||||
|
endpoint:
|
||||||
|
health:
|
||||||
|
enabled: true
|
||||||
|
show-details: always
|
||||||
|
metrics:
|
||||||
|
enabled: true
|
||||||
|
prometheus:
|
||||||
|
enabled: true
|
||||||
|
endpoints:
|
||||||
|
web:
|
||||||
|
base-path: /actuator
|
||||||
|
exposure:
|
||||||
|
include: health,info,prometheus,metrics
|
||||||
|
metrics:
|
||||||
|
tags:
|
||||||
|
application: ${spring.application.name}
|
||||||
|
|
||||||
|
|
||||||
|
logging:
|
||||||
|
level:
|
||||||
|
root: INFO
|
||||||
|
eu:
|
||||||
|
openaire:
|
||||||
|
pdf_aggregation_statistics: DEBUG
|
||||||
|
org:
|
||||||
|
springframework:
|
||||||
|
security: WARN
|
||||||
|
web: INFO
|
||||||
|
apache:
|
||||||
|
hadoop:
|
||||||
|
io:
|
||||||
|
compress: WARN
|
|
@ -0,0 +1,33 @@
|
||||||
|
<configuration debug="false">
|
||||||
|
|
||||||
|
<appender name="RollingFile" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||||
|
<file>logs/PDF_Aggregation_Statistics.log</file>
|
||||||
|
|
||||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.FixedWindowRollingPolicy">
|
||||||
|
<fileNamePattern>logs/PDF_Aggregation_Statistics.%i.log.zip</fileNamePattern>
|
||||||
|
<minIndex>1</minIndex>
|
||||||
|
<maxIndex>20</maxIndex>
|
||||||
|
</rollingPolicy>
|
||||||
|
|
||||||
|
<triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
|
||||||
|
<maxFileSize>50MB</maxFileSize>
|
||||||
|
</triggeringPolicy>
|
||||||
|
|
||||||
|
<encoder>
|
||||||
|
<charset>UTF-8</charset>
|
||||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
|
||||||
|
</encoder>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
|
||||||
|
<encoder>
|
||||||
|
<charset>UTF-8</charset>
|
||||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
|
||||||
|
</encoder>
|
||||||
|
</appender>
|
||||||
|
|
||||||
|
<root level="debug">
|
||||||
|
<appender-ref ref="Console" />
|
||||||
|
</root>
|
||||||
|
|
||||||
|
</configuration>
|
|
@ -0,0 +1,12 @@
|
||||||
|
package eu.openaire.pdf_aggregation_statistics;
|
||||||
|
|
||||||
|
import org.springframework.boot.test.context.SpringBootTest;
|
||||||
|
|
||||||
|
@SpringBootTest
|
||||||
|
class PdfAggregationStatisticsApplicationTests {
|
||||||
|
|
||||||
|
//@Test
|
||||||
|
void contextLoads() {
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue