From cf0aed52748269a5ad981e84eeb331649eb5391b Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Tue, 20 Jun 2023 22:16:07 +0300 Subject: [PATCH] Fully functional "PDF Aggregation Statistics" Service. --- .gitignore | 37 +++++++ Dockerfile | 7 ++ README.md | 12 +++ build.gradle | 100 ++++++++++++++++++ docker-compose.yml | 18 ++++ gradle.properties | 4 + gradle/wrapper/gradle-wrapper.properties | 6 ++ installAndRun.sh | 72 +++++++++++++ settings.gradle | 6 ++ .../Components/SchedulingTasks.java | 32 ++++++ .../PdfAggregationStatisticsApplication.java | 46 ++++++++ .../controllers/StatsController.java | 67 ++++++++++++ .../security/SecurityConfiguration.java | 47 ++++++++ .../services/StatsService.java | 8 ++ .../services/StatsServiceImpl.java | 62 +++++++++++ .../util/UriBuilder.java | 93 ++++++++++++++++ src/main/resources/application.yml | 63 +++++++++++ src/main/resources/logback-spring.xml | 33 ++++++ ...AggregationStatisticsApplicationTests.java | 12 +++ 19 files changed, 725 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 build.gradle create mode 100644 docker-compose.yml create mode 100644 gradle.properties create mode 100644 gradle/wrapper/gradle-wrapper.properties create mode 100755 installAndRun.sh create mode 100644 settings.gradle create mode 100644 src/main/java/eu/openaire/pdf_aggregation_statistics/Components/SchedulingTasks.java create mode 100644 src/main/java/eu/openaire/pdf_aggregation_statistics/PdfAggregationStatisticsApplication.java create mode 100644 src/main/java/eu/openaire/pdf_aggregation_statistics/controllers/StatsController.java create mode 100644 src/main/java/eu/openaire/pdf_aggregation_statistics/security/SecurityConfiguration.java create mode 100644 src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java create mode 100644 src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java create mode 100644 src/main/java/eu/openaire/pdf_aggregation_statistics/util/UriBuilder.java create mode 100644 src/main/resources/application.yml create mode 100644 src/main/resources/logback-spring.xml create mode 100644 src/test/java/eu/openaire/pdf_aggregation_statistics/PdfAggregationStatisticsApplicationTests.java diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c2065bc --- /dev/null +++ b/.gitignore @@ -0,0 +1,37 @@ +HELP.md +.gradle +build/ +!gradle/wrapper/gradle-wrapper.jar +!**/src/main/**/build/ +!**/src/test/**/build/ + +### STS ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache +bin/ +!**/src/main/**/bin/ +!**/src/test/**/bin/ + +### IntelliJ IDEA ### +.idea +*.iws +*.iml +*.ipr +out/ +!**/src/main/**/out/ +!**/src/test/**/out/ + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ + +### VS Code ### +.vscode/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2dfb4e3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,7 @@ +FROM openjdk:8-jdk-alpine + +COPY build/libs/*-SNAPSHOT.jar pdf_aggregation_statistics.jar + +EXPOSE 1882 + +ENTRYPOINT ["java","-jar","/pdf_aggregation_statistics.jar", "--spring.config.location=file:///mnt/config/application.yml"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..c625b41 --- /dev/null +++ b/README.md @@ -0,0 +1,12 @@ +# PDF Aggregation Statistics + +This is a public API to get specific statistics from the PDF Aggregation Service. + + +**To install and run the application**: +- Run ```git clone``` and then ```cd pdf_aggregation_statistics```. +- Set the preferable values inside the [__application.yml__](https://code-repo.d4science.org/lsmyrnaios/pdf_aggregation_statistics/src/branch/master/src/main/resources/application.yml) file. +- Execute the ```installAndRun.sh``` script which builds and runs the app.
+ If you want to just run the app, then run the script with the argument "1": ```./installAndRun.sh 1```.
+ If you want to build and run the app on a **Docker Container**, then run the script with the argument "0" followed by the argument "1": ```./installAndRun.sh 0 1```.
+
\ No newline at end of file diff --git a/build.gradle b/build.gradle new file mode 100644 index 0000000..38c85e8 --- /dev/null +++ b/build.gradle @@ -0,0 +1,100 @@ +plugins { + id 'java' + id 'org.springframework.boot' version '2.7.12' + id 'io.spring.dependency-management' version '1.1.0' +} + +group = 'eu.openaire.pdf_aggregation_statistics' +version = '0.0.1-SNAPSHOT' +sourceCompatibility = '1.8' + +repositories { + mavenCentral() + maven { + name "omtd" + url "https://repo.openminted.eu/content/repositories/releases/" + } + maven { + name "pentaho-repo" + url "https://public.nexus.pentaho.org/content/groups/omni/" + } +} + +dependencies { + runtimeOnly "org.springframework.boot:spring-boot-devtools" + + implementation "org.springframework.boot:spring-boot-starter-web" + implementation("org.springframework.boot:spring-boot-starter-security") + implementation("org.springframework.boot:spring-boot-starter-jdbc") + + implementation("org.springframework.boot:spring-boot-configuration-processor") + implementation("org.springframework.boot:spring-boot-starter-actuator") + implementation("org.springframework.boot:spring-boot-starter-aop") + + implementation("org.springframework.security:spring-security-core") + implementation("org.springframework.security:spring-security-web") + implementation("org.springframework.security:spring-security-config") + + // https://mvnrepository.com/artifact/com.cloudera.impala/jdbc + implementation("com.cloudera.impala:jdbc:2.5.31") { + exclude group: 'org.apache.hive', module: 'hive-exec' + exclude group: 'com.twitter', module: 'parquet-hadoop-bundle' + exclude group: 'org.apache.parquet', module: 'parquet-avro' + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.apache.derby', module: 'derby' + exclude group: 'org.eclipse.jetty.aggregate', module: 'jetty-all' + exclude group: 'ch.qos.log4j', module: 'log4j' + exclude group: 'ch.qos.log4j', module: 'apache-log4j-extras' + + // Vulnerable dependencies: + exclude group: 'log4j', module: 'log4j' + exclude group: 'org.apache.ant', module: 'ant' + exclude group: 'org.apache.thrift', module: 'libthrift' // This is an older version (we add the updated one later). + exclude group: 'org.apache.hive', module: 'hive-metastore' + // Avoid excluding 'org.apache.hive:hive-service', as this is needed and unfortunately, even adding a newer version separately, it introducing other vulnerable dependencies. + } + + // https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common + implementation('org.apache.hadoop:hadoop-common:3.3.5') { + exclude group: 'org.apache.parquet', module: 'parquet-avro' + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-api' + exclude group: 'org.slf4j', module: 'slf4j-reload4j' + exclude group: 'ch.qos.reload4j', module: 'reload4j' + + // Vulnerable dependencies: + exclude group: 'com.google.protobuf', module: 'protobuf-java' + exclude group: 'org.codehaus.jackson', module: 'jackson-core-asl' + exclude group: 'org.codehaus.jackson', module: 'jackson-mapper-asl' + exclude group: 'com.fasterxml.woodstox', module: 'woodstox-core' + //exclude group: 'commons-collections', module: 'commons-collections' // This dependency is required in order for the program to run without errors. It is discontinued. + } + + // https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core + implementation('org.apache.hadoop:hadoop-mapreduce-client-core:3.3.5') { + exclude group: 'org.apache.parquet', module: 'parquet-avro' + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-api' + exclude group: 'org.slf4j', module: 'slf4j-reload4j' + exclude group: 'ch.qos.reload4j', module: 'reload4j' + + // Vulnerable dependencies: + exclude group: 'com.google.protobuf', module: 'protobuf-java' + exclude group: 'io.netty', module: 'netty' + } + + // Add back some updated version of the needed dependencies. + implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8. + implementation 'com.fasterxml.woodstox:woodstox-core:6.5.1' + + // https://mvnrepository.com/artifact/io.micrometer/micrometer-registry-prometheus + runtimeOnly 'io.micrometer:micrometer-registry-prometheus:1.11.1' + + testImplementation 'org.springframework.security:spring-security-test' + testImplementation "org.springframework.boot:spring-boot-starter-test" +} + +tasks.named('test') { + useJUnitPlatform() +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..187dcbe --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,18 @@ +version: '3.3' + +services: + pdf_aggregation_statistics: + image: 'pdf_aggregation_service/pdf_aggregation_statistics:latest' + container_name: pdf_aggregation_statistics + ports: + - '1882:1882' + volumes: + - type: bind + source: $HOME/tmp/config + target: /mnt/config + - type: bind + source: $HOME/logs + target: /logs + build: + dockerfile: ./Dockerfile + context: . diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 0000000..9cc4b13 --- /dev/null +++ b/gradle.properties @@ -0,0 +1,4 @@ +org.gradle.caching=true +org.gradle.parallel=true +org.gradle.caching.debug=false +org.gradle.warning.mode=all diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..37aef8d --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.1.1-bin.zip +networkTimeout=10000 +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/installAndRun.sh b/installAndRun.sh new file mode 100755 index 0000000..e87151d --- /dev/null +++ b/installAndRun.sh @@ -0,0 +1,72 @@ +# This script installs and runs the project. + +# For error-handling, we cannot use the "set -e" since: it has problems https://mywiki.wooledge.org/BashFAQ/105 +# So we have our own function, for use when a single command fails. +handle_error () { + echo -e "\n\n$1\n\n"; exit $2 +} + +# Change the working directory to the script's directory, when running from another location. +cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1 + +justInstall=0 +shouldRunInDocker=0 + +if [[ $# -eq 1 ]]; then + justInstall=$1 +elif [[ $# -eq 2 ]]; then + justInstall=$1 + shouldRunInDocker=$2 +elif [[ $# -gt 2 ]]; then + echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh "; exit 2 +fi + +if [[ justInstall -eq 1 && shouldRunInDocker -eq 1 ]]; then + echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justInstall\" to < 0 >" + justInstall=0 +fi + +gradleVersion="8.1.1" + +if [[ justInstall -eq 0 ]]; then + + if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then + wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip + echo -e "\nAsking for sudo, in order to install 'gradle'..\n" + sudo mkdir /opt/gradle + sudo apt install -y unzip && sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip + #ls /opt/gradle/gradle-${gradleVersion} # For debugging installation + fi + + export PATH=/opt/gradle/gradle-${gradleVersion}/bin:$PATH + + gradle wrapper --gradle-version=${gradleVersion} --distribution-type=bin + + #gradle tasks # For debugging installation + #gradle -v # For debugging installation + + gradle clean build + + if [[ shouldRunInDocker -eq 1 ]]; then + + echo -e "\nBuilding the docker image and running the containers..\n" + sudo docker --version || handle_error "Docker was not found!" 3 + (sudo mkdir -p "$HOME"/tmp/config && sudo cp ./src/main/resources/application.yml "$HOME"/tmp/config) || true # This also replaces an existing "application.yml". + sudo mkdir -p "$HOME"/logs || true + + # Run in "detached mode" -d (in the background). + (sudo docker compose up --build -d && echo -e "\nThe pdf_aggregation_statistics docker-container started running.\n") || handle_error "Could not build and/or run the 'pdf_aggregation_statistics' docker container!" 4 + + echo -e "Waiting 55 seconds before getting the status..\n" + sleep 55 + sudo docker ps -a || handle_error "Could not get the status of docker-containers!" 6 # Using -a to get the status of failed containers as well. + echo -e "\n\nGetting the logs of docker-container \"pdf_aggregation_statistics\":\n" + sudo docker logs "$(sudo docker ps -aqf "name=^pdf_aggregation_statistics$")" || handle_error "Could not get the logs of docker-container \"pdf_aggregation_statistics\"!" 7 # Using "regex anchors" to avoid false-positives. Works even if the container is not running, thus showing the error-log. + fi +else + export PATH=/opt/gradle/gradle-${gradleVersion}/bin:$PATH # Make sure the gradle is still accessible (it usually isn't without the "export"). +fi + +if [[ shouldRunInDocker -ne 1 ]]; then + gradle bootRun +fi diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 0000000..6fea821 --- /dev/null +++ b/settings.gradle @@ -0,0 +1,6 @@ +pluginManagement { + repositories { + gradlePluginPortal() + } +} +rootProject.name = 'pdf_aggregation_statistics' diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/Components/SchedulingTasks.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/Components/SchedulingTasks.java new file mode 100644 index 0000000..75966da --- /dev/null +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/Components/SchedulingTasks.java @@ -0,0 +1,32 @@ +package eu.openaire.pdf_aggregation_statistics.Components; + +import eu.openaire.pdf_aggregation_statistics.services.StatsService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Component; + +@Component +public class SchedulingTasks { + + private static final Logger logger = LoggerFactory.getLogger(SchedulingTasks.class); + + + @Autowired + StatsService statsService; + + + @Scheduled(initialDelay = 1, fixedDelay = 21_600_000) // Run right after initialization and then every 6 hours. + public void gatherPayloadsPerDatasource() + { + // Request the number of payloads for each datasource and keep them in a ConcurrentHashMap, + // where the "key" will be the "datasourceId" and the "value" will be the numOfPayloads for that datasource. + + // When the user requests the numOfPayloads for a given datasourceI, the app will return the rwsult immediately + // It will be a quick O(1) get operation in the HashMap. + + statsService.gatherNumberOfPayloadsPerDatasource(); + } + +} diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/PdfAggregationStatisticsApplication.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/PdfAggregationStatisticsApplication.java new file mode 100644 index 0000000..a17b932 --- /dev/null +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/PdfAggregationStatisticsApplication.java @@ -0,0 +1,46 @@ +package eu.openaire.pdf_aggregation_statistics; + +import eu.openaire.pdf_aggregation_statistics.util.UriBuilder; +import org.springframework.boot.CommandLineRunner; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.web.servlet.context.ServletWebServerApplicationContext; +import org.springframework.context.annotation.Bean; +import org.springframework.core.env.Environment; +import org.springframework.scheduling.annotation.EnableScheduling; +import org.springframework.web.cors.CorsConfiguration; +import org.springframework.web.cors.CorsConfigurationSource; +import org.springframework.web.cors.UrlBasedCorsConfigurationSource; + +import java.util.Arrays; +import java.util.Collections; + +@SpringBootApplication +@EnableScheduling +public class PdfAggregationStatisticsApplication { + + public static void main(String[] args) { + SpringApplication.run(PdfAggregationStatisticsApplication.class, args); + } + + + @Bean + public CorsConfigurationSource corsConfigurationSource() { + CorsConfiguration configuration = new CorsConfiguration(); + configuration.setAllowedOrigins(Collections.singletonList("*")); + configuration.setAllowedMethods(Collections.singletonList("GET")); + configuration.setAllowedHeaders(Arrays.asList("authorization", "content-type", "x-auth-token")); + configuration.setExposedHeaders(Collections.singletonList("x-auth-token")); + UrlBasedCorsConfigurationSource source = new UrlBasedCorsConfigurationSource(); + source.registerCorsConfiguration("/**", configuration); + return source; + } + + + @Bean + public CommandLineRunner setServerBaseUrl(Environment environment, ServletWebServerApplicationContext webServerAppCtxt) + { + return args -> new UriBuilder(environment, webServerAppCtxt); + } + +} diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/controllers/StatsController.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/controllers/StatsController.java new file mode 100644 index 0000000..efc4803 --- /dev/null +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/controllers/StatsController.java @@ -0,0 +1,67 @@ +package eu.openaire.pdf_aggregation_statistics.controllers; + + +import eu.openaire.pdf_aggregation_statistics.services.StatsServiceImpl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + + +/** + * This controller returns statistics for the database. + */ +@RestController +@RequestMapping("/stats") +public class StatsController { + + private static final Logger logger = LoggerFactory.getLogger(StatsController.class); + + + // This is a public API, so we will only support statistics that are actually needed and used by certain OpenAIRE Services. + // So for now, only a few requested metrics will be available. + + + /** + * This endpoint returns the number of payloads related to the given datasourceID. + * Example of a datasourceID (ArXiv): opendoar____::6f4922f45568161a8cdf4ad2299f6d23 + * */ + @GetMapping("getNumberOfPayloadsForDatasource") + public ResponseEntity getNumberOfPayloadsForDatasource(@RequestParam String datasourceId) + { + if ( logger.isDebugEnabled() ) + logger.debug("Received a \"getNumberOfPayloadsForDatasource\" request for datasourceID: " + datasourceId); + + String errorMsg = "The given \"datasourceID\": \"" + datasourceId + "\" is not an valid datasourceID."; + if ( datasourceId.length() != 46 ) { + logger.error(errorMsg + " The number of its characters is different than 46."); + return ResponseEntity.badRequest().body(errorMsg); + } else { + String[] parts = datasourceId.split("::", 2); // At most 2 parts will come out of the initial string. + if ( (parts.length != 2) || (parts[0].length() != 12) || (parts[1].length() != 32) ) { + logger.error(errorMsg + " It has non-valid parts."); + return ResponseEntity.badRequest().body(errorMsg); + } + } + + // Search the Hashmap and get the value for this datasource. + // The Map has the numOfPayloads for all datasources, even for newly added ones. + // If the given datasourceID is not found in the map, then either is not a datasource or that datasource is not participating in the OpenAIRE Graph. + + if ( StatsServiceImpl.datasourcesWithNumOfPayloads.isEmpty() ) { + errorMsg = "The \"datasourcesWithNumOfPayloads\" map was not populated!"; + logger.error(errorMsg); + return ResponseEntity.internalServerError().body(errorMsg); + } + + Integer numPayloads = StatsServiceImpl.datasourcesWithNumOfPayloads.get(datasourceId); + if ( numPayloads == null ) + return ResponseEntity.notFound().build(); + else + return ResponseEntity.ok(numPayloads); + } + +} diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/security/SecurityConfiguration.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/security/SecurityConfiguration.java new file mode 100644 index 0000000..1cb3a27 --- /dev/null +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/security/SecurityConfiguration.java @@ -0,0 +1,47 @@ +package eu.openaire.pdf_aggregation_statistics.security; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.security.config.annotation.method.configuration.EnableGlobalMethodSecurity; +import org.springframework.security.config.annotation.web.builders.HttpSecurity; +import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity; +import org.springframework.security.config.http.SessionCreationPolicy; +import org.springframework.security.web.SecurityFilterChain; + + +@Configuration +@EnableWebSecurity +@EnableGlobalMethodSecurity ( + securedEnabled = false, // Just for now.. + jsr250Enabled = true, + prePostEnabled = true +) +public class SecurityConfiguration { + + @Bean + public SecurityFilterChain filterChain(HttpSecurity http) throws Exception { + http + .headers() + .frameOptions() + .sameOrigin() + .and() + .cors() + .and() + .csrf() + .disable() + .exceptionHandling() + .and() + .sessionManagement() + .sessionCreationPolicy(SessionCreationPolicy.STATELESS) + .and() + .authorizeRequests() + .antMatchers("/**").permitAll() + //.anyRequest().authenticated() + //.and() + //.requiresChannel() + //.anyRequest().requiresSecure() + ; + return http.build(); + } + +} \ No newline at end of file diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java new file mode 100644 index 0000000..7ef9dd1 --- /dev/null +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java @@ -0,0 +1,8 @@ +package eu.openaire.pdf_aggregation_statistics.services; + + +public interface StatsService { + + void gatherNumberOfPayloadsPerDatasource(); + +} diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java new file mode 100644 index 0000000..9b98f21 --- /dev/null +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java @@ -0,0 +1,62 @@ +package eu.openaire.pdf_aggregation_statistics.services; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.dao.EmptyResultDataAccessException; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.stereotype.Service; + +import java.sql.SQLException; +import java.util.concurrent.ConcurrentHashMap; + + +@Service +public class StatsServiceImpl implements StatsService { + + private static final Logger logger = LoggerFactory.getLogger(StatsServiceImpl.class); + + @Autowired + private JdbcTemplate jdbcTemplate; + + @Value("${database-name}") + private String databaseName; + + // No DB-lock is required for these READ-operations. + + public static final ConcurrentHashMap datasourcesWithNumOfPayloads = new ConcurrentHashMap<>(105_000); // The number of datasources is around 10_000. + + + public void gatherNumberOfPayloadsPerDatasource() + { + final String getNumberOfPayloadsPerDatasourceQuery = + "select d.id, count(p.id) as payload_count from " + databaseName + ".datasource d\n" + + " join " + databaseName + ".publication pu on pu.datasourceid=d.id\n" + + " left join " + databaseName + ".payload p on p.id=pu.id\n" + // We want the datasources with 0 payloads too, so we use "left join" + " group by d.id"; // The group-by is needed. + + if ( logger.isTraceEnabled() ) + logger.trace("getNumberOfPayloadsPerDatasourceQuery:\n" + getNumberOfPayloadsPerDatasourceQuery); + + logger.info("Going to populate/update the \"datasourcesWithNumOfPayloads\" map."); + try { + jdbcTemplate.query(getNumberOfPayloadsPerDatasourceQuery, rs -> { + try { // For each of the 4 columns returned. The indexing starts from 1 + datasourcesWithNumOfPayloads.put(rs.getString(1), rs.getInt(2)); // Updates then number for an existing datasourceId or adds a new mapping for a new datasourceId. + } catch (SQLException sqle) { + logger.error("No value was able to be retrieved from one of the columns of row_" + rs.getRow(), sqle); + } + }); + logger.info("The \"datasourcesWithNumOfPayloads\" map was populated/updated."); + } catch (EmptyResultDataAccessException erdae) { + logger.warn("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery); + } catch (Exception e) { + logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e); + } + } + + // To get the human-friendly timestamp format from the BigInt in the database: + // select from_timestamp(CAST(CAST(`date` as decimal(30,0))/1000 AS timestamp), "yyyy-MM-dd HH:mm:ss.SSS") from payload + +} diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/util/UriBuilder.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/util/UriBuilder.java new file mode 100644 index 0000000..fd7bb17 --- /dev/null +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/util/UriBuilder.java @@ -0,0 +1,93 @@ +package eu.openaire.pdf_aggregation_statistics.util; + + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.boot.web.servlet.context.ServletWebServerApplicationContext; +import org.springframework.core.env.Environment; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.InetAddress; +import java.net.URL; + + +public class UriBuilder { + + private static final Logger logger = LoggerFactory.getLogger(UriBuilder.class); + + public static String ip = null; + public static String baseUrl = null; + + public UriBuilder(Environment environment, ServletWebServerApplicationContext webServerAppCtxt) { + baseUrl = "http"; + + String sslEnabled = environment.getProperty("server.ssl.enabled"); + if (sslEnabled == null) { // It's expected to not exist if there is no SSL-configuration. + logger.warn("No property \"server.ssl.enabled\" was found in \"application.yml\". Continuing with plain HTTP.."); + sslEnabled = "false"; + } + baseUrl += sslEnabled.equals("true") ? "s" : ""; + baseUrl += "://"; + + if ( (ip = getPublicIP()) == null ) + ip = InetAddress.getLoopbackAddress().getHostAddress(); // Non-null. + + baseUrl += ip + ":" + webServerAppCtxt.getWebServer().getPort(); + + String baseInternalPath = environment.getProperty("server.servlet.context-path"); + if ( baseInternalPath != null ) { + if ( !baseInternalPath.startsWith("/") ) + baseUrl += "/"; + baseUrl += baseInternalPath; + if ( !baseInternalPath.endsWith("/") ) + baseUrl += "/"; + } else { + logger.warn("No property \"server.servlet.context-path\" was found in \"application.yml\"!"); // Yes it's expected. + baseUrl += "/"; + } + + logger.debug("ServerBaseURL: " + baseUrl); + } + + private static String getPublicIP() + { + String publicIpAddress = ""; + HttpURLConnection conn = null; + String urlString = "https://checkip.amazonaws.com/"; + try { + conn = (HttpURLConnection) new URL(urlString).openConnection(); + conn.setConnectTimeout(60_000); // 1 minute + conn.setReadTimeout(120_000); // 2 minutes + conn.setRequestMethod("GET"); + conn.connect(); + + int responseCode = conn.getResponseCode(); + if ( responseCode != 200 ) { + logger.warn("Cannot get the publicIP address for this machine, as \"" + urlString + "\" returned the HTTP-error-code: " + responseCode); + return null; + } + + try ( BufferedReader bf = new BufferedReader(new InputStreamReader(conn.getInputStream()))) { + publicIpAddress = bf.readLine().trim(); + } + } catch (Exception e) { + logger.warn("Cannot get the publicIP address for this machine, from \"" + urlString + "\"!", e); + return null; + } finally { + if ( conn != null ) + conn.disconnect(); + } + return publicIpAddress; + } + + public static String getBaseUrl() { + return baseUrl; + } + + public static void setBaseUrl(String baseUrl) { + UriBuilder.baseUrl = baseUrl; + } + +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml new file mode 100644 index 0000000..225ca4a --- /dev/null +++ b/src/main/resources/application.yml @@ -0,0 +1,63 @@ +server: + port: 1882 + servlet: + context-path: /api + shutdown: graceful + +database-name: pdfaggregation_i + +spring: + application: + name: PDF_Aggregation_Statistics + datasource: + driver-class-name: com.cloudera.impala.jdbc41.Driver + url: jdbc:impala://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/ + username: '' + password: '' + hikari: + connectionTimeout: 30000 + idleTimeout: 600000 + maxLifetime: 1800000 + maximumPoolSize: 20 + minimumIdle: 4 + pool-name: StatisticsPool + output: + ansi: + enabled: always + lifecycle: + timeout-per-shutdown-phase: 2m + +# Prometheus related config. +management: + endpoint: + health: + enabled: true + show-details: always + metrics: + enabled: true + prometheus: + enabled: true + endpoints: + web: + base-path: /actuator + exposure: + include: health,info,prometheus,metrics + metrics: + tags: + application: ${spring.application.name} + + +logging: + level: + root: INFO + eu: + openaire: + pdf_aggregation_statistics: DEBUG + org: + springframework: + security: WARN + web: INFO + apache: + hadoop: + io: + compress: WARN diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml new file mode 100644 index 0000000..5831c76 --- /dev/null +++ b/src/main/resources/logback-spring.xml @@ -0,0 +1,33 @@ + + + + logs/PDF_Aggregation_Statistics.log + + + logs/PDF_Aggregation_Statistics.%i.log.zip + 1 + 20 + + + + 50MB + + + + UTF-8 + %d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n + + + + + + UTF-8 + %d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n + + + + + + + + \ No newline at end of file diff --git a/src/test/java/eu/openaire/pdf_aggregation_statistics/PdfAggregationStatisticsApplicationTests.java b/src/test/java/eu/openaire/pdf_aggregation_statistics/PdfAggregationStatisticsApplicationTests.java new file mode 100644 index 0000000..0622b8b --- /dev/null +++ b/src/test/java/eu/openaire/pdf_aggregation_statistics/PdfAggregationStatisticsApplicationTests.java @@ -0,0 +1,12 @@ +package eu.openaire.pdf_aggregation_statistics; + +import org.springframework.boot.test.context.SpringBootTest; + +@SpringBootTest +class PdfAggregationStatisticsApplicationTests { + + //@Test + void contextLoads() { + } + +}