From f835a752bf0bc3713eea783e198c8d3407dfec6c Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Mon, 20 Mar 2023 15:23:00 +0200 Subject: [PATCH] Transform the "application.properties" file to "application.yml" and optimize the property-trees. --- .../util/ParquetFileUtils.java | 2 +- src/main/resources/application.properties | 75 ------------------- src/main/resources/application.yml | 74 ++++++++++++++++++ 3 files changed, 75 insertions(+), 76 deletions(-) delete mode 100644 src/main/resources/application.properties create mode 100644 src/main/resources/application.yml diff --git a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java index c20a9fe..09d2663 100644 --- a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java @@ -79,7 +79,7 @@ public class ParquetFileUtils { public ParquetFileUtils(@Value("${hdfs.baseUrl}") String webHDFSBaseUrl, - @Value("${hdfs.httpAuth}") String hdfsHttpAuthString, @Value("${hdfs.userName}") String hdfsUserName, @Value("${hdfs.password}") String hdfsPassword, @Value("${output.parquetLocalDirectoryPath}") String parquetBaseDirectoryPath, + @Value("${hdfs.httpAuth}") String hdfsHttpAuthString, @Value("${hdfs.userName}") String hdfsUserName, @Value("${hdfs.password}") String hdfsPassword, @Value("${services.pdfaggregation.controller.parquetLocalDirectoryPath}") String parquetBaseDirectoryPath, @Value("${hdfs.parquetRemoteBaseDirectoryPath}") String hdfsParquetBaseDir, @Value("${services.pdfaggregation.controller.isTestEnvironment}") boolean isTestEnvironment, FileUtils fileUtils) throws IOException { diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties deleted file mode 100644 index 0888e79..0000000 --- a/src/main/resources/application.properties +++ /dev/null @@ -1,75 +0,0 @@ -# HTTP CONFIGURATION -server.port = 1880 -# Server api path -server.servlet.context-path=/api - -# Service config - -services.pdfaggregation.controller.isTestEnvironment = false -# In case the "isTestEnvironment" is "true", the "testDatabase" below and all its tables are created (if not exist). -# The tables "datasource", "publication", "publication_pids" and "publication_urls" are filled with the data from the same tables existing in the "initialDatabase", if they don't exist. -# In case the "isTestEnvironment" is "false", the "initialDatabase" is used. The Controller assumes that the above 4 tables are present, and only creates, if they don't exist, the following tables: -# "assignment", "attempt" and "payload", which are populated during execution. - -services.pdfaggregation.controller.db.initialDatabaseName = pdfaggregation_i -services.pdfaggregation.controller.db.testDatabaseName = pdfaggregationdatabase_new_s3_names - -services.pdfaggregation.controller.baseFilesLocation = tmp/ -services.pdfaggregation.controller.maxAttemptsPerRecord = 3 -services.pdfaggregation.controller.assignmentLimit = 10000 - -services.pdfaggregation.controller.s3.endpoint = xa -services.pdfaggregation.controller.s3.accessKey = xa -services.pdfaggregation.controller.s3.secretKey = xa -services.pdfaggregation.controller.s3.region = xa -services.pdfaggregation.controller.s3.bucketName = xa -services.pdfaggregation.controller.s3.shouldEmptyBucket = false -services.pdfaggregation.controller.s3.shouldShowAllS3Buckets = true - - -# Database -spring.datasource.url=jdbc:impala://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/ -spring.datasource.username= -spring.datasource.password= -spring.datasource.driver-class-name=com.cloudera.impala.jdbc41.Driver - -spring.datasource.hikari.pool-name=ControllerPool -spring.datasource.hikari.maximumPoolSize=20 -spring.datasource.hikari.maxLifetime=1800000 -spring.datasource.hikari.minimumIdle=4 -spring.datasource.hikari.connectionTimeout=30000 -spring.datasource.hikari.idleTimeout=600000 - -# LOGGING LEVELS -logging.level.root=INFO -logging.level.org.springframework.web=INFO -logging.level.org.springframework.security=WARN -logging.level.org.apache.hadoop.io.compress=WARN -logging.level.eu.openaire.urls_controller=DEBUG -spring.output.ansi.enabled=always - -# Parquet settings -hdfs.baseUrl=https://iis-cdh5-test-gw.ocean.icm.edu.pl/webhdfs/v1 - -# HTTP-Authorization --> Authorization: Basic Base64Encode(username:password) -# Give the credentials by either giving the Http-Auth-string AND the username (used as parameter in the WebHdfs-requests) -# Or by giving the username AND the password, in order for the program to crete the auth-String programmatically. -# The first approach is intended for more privacy, while the second for more ease. Either way, all three should be uncommented, no matter which ones are used. - -hdfs.httpAuth= -hdfs.userName= -hdfs.password= - -output.parquetLocalDirectoryPath=${services.pdfaggregation.controller.baseFilesLocation}parquetFiles/ -hdfs.parquetRemoteBaseDirectoryPath=/tmp/parquet_uploads/ - -## MULTIPART (MultipartProperties) - -# Enable multipart uploads -spring.servlet.multipart.enabled=true -# Threshold after which files are written to disk. -spring.servlet.multipart.file-size-threshold=2KB -# Max file size. -spring.servlet.multipart.max-file-size=200MB -# Max Request Size -spring.servlet.multipart.max-request-size=215MB \ No newline at end of file diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml new file mode 100644 index 0000000..bd9d1ab --- /dev/null +++ b/src/main/resources/application.yml @@ -0,0 +1,74 @@ +server: + port: 1880 + servlet: + context-path: /api + +services: + pdfaggregation: + controller: + isTestEnvironment: false + # In case the "isTestEnvironment" is "true", the "testDatabase" below and all its tables are created (if not exist). + # The tables "datasource", "publication", "publication_pids" and "publication_urls" are filled with the data from the same tables existing in the "initialDatabase", if they don't exist. + # In case the "isTestEnvironment" is "false", the "initialDatabase" is used. The Controller assumes that the above 4 tables are present, and only creates, if they don't exist, the following tables: + # "assignment", "attempt" and "payload", which are populated during execution. + + db: + initialDatabaseName: pdfaggregation_i + testDatabaseName: pdfaggregationdatabase_new_s3_names + + assignmentLimit: 10000 + maxAttemptsPerRecord: 3 + baseFilesLocation: tmp/ + parquetLocalDirectoryPath: ${services.pdfaggregation.controller.baseFilesLocation}parquetFiles/ + s3: + endpoint: XA + accessKey: XA + secretKey: XA + region: XA + bucketName: XA + shouldEmptyBucket: false + shouldShowAllS3Buckets: true + +spring: + datasource: + driver-class-name: com.cloudera.impala.jdbc41.Driver + url: XA + username: '' + password: '' + hikari: + connectionTimeout: 30000 + idleTimeout: 600000 + maxLifetime: 1800000 + maximumPoolSize: 20 + minimumIdle: 4 + pool-name: ControllerPool + output: + ansi: + enabled: always + +hdfs: + baseUrl: XA + userName: XA + password: XA + httpAuth: '' + # HTTP-Authorization --> Authorization: Basic Base64Encode(username:password) + # Give the credentials by either giving the Http-Auth-string AND the username (used as parameter in the WebHdfs-requests) + # Or by giving the username AND the password, in order for the program to crete the auth-String programmatically. + # The first approach is intended for more privacy, while the second for more ease. Either way, all three should be uncommented, no matter which ones are used. + + parquetRemoteBaseDirectoryPath: /tmp/parquet_uploads/ + +logging: + level: + root: INFO + eu: + openaire: + urls_controller: DEBUG + org: + springframework: + security: WARN + web: INFO + apache: + hadoop: + io: + compress: WARN