UrlsController/src/main/resources/application.yml

103 lines
3.9 KiB
YAML

server:
port: 1880
servlet:
context-path: /api
services:
pdfaggregation:
controller:
isTestEnvironment: false
# In case the "isTestEnvironment" is "true", the "testDatabase" below and all its tables are created (if not exist).
# The tables "datasource", "publication", "publication_pids" and "publication_urls" are filled with the data from the same tables existing in the "initialDatabase", if they don't exist.
# In case the "isTestEnvironment" is "false", the "initialDatabase" is used. The Controller assumes that the above 4 tables are present, and only creates, if they don't exist, the following tables:
# "assignment", "attempt" and "payload", which are populated during execution.
db:
initialDatabaseName: pdfaggregation_i
testDatabaseName: pdfaggregationdatabase_new_s3_names
assignmentLimit: 10000
maxAttemptsPerRecord: 3
baseFilesLocation: tmp/
parquetLocalDirectoryPath: ${services.pdfaggregation.controller.baseFilesLocation}parquetFiles/
s3:
endpoint: XA
accessKey: XA
secretKey: XA
region: XA
bucketName: XA
shouldEmptyBucket: false
shouldShowAllS3Buckets: true
datasources: # Provide a list of datasource IDs, which should be excluded from crawling. Their content is either bulk-imported or is known to be restricted.
excludedIDs: > # Use comma-seperated values (one in each line for best readability), as Spring has is currently incapable of parsing Dropwizard-styled lists (at least without additional config).
opendoar____::6f4922f45568161a8cdf4ad2299f6d23
# Since we use a multi-line value from our list, we add the ID-explanations here (otherwise comments will be part of values):
# First-id: arXiv.org e-Print Archive
spring:
application:
name: Urls_Controller
datasource:
driver-class-name: com.cloudera.impala.jdbc41.Driver
url: XA
username: ''
password: ''
hikari:
connectionTimeout: 30000
idleTimeout: 600000
maxLifetime: 1800000
maximumPoolSize: 20
minimumIdle: 4
pool-name: ControllerPool
output:
ansi:
enabled: always
hdfs:
baseUrl: XA
userName: XA
password: XA
httpAuth: ''
# HTTP-Authorization --> Authorization: Basic Base64Encode(username:password)
# Give the credentials by either giving the Http-Auth-string AND the username (used as parameter in the WebHdfs-requests)
# Or by giving the username AND the password, in order for the program to crete the auth-String programmatically.
# The first approach is intended for more privacy, while the second for more ease. Either way, all three should be uncommented, no matter which ones are used.
parquetRemoteBaseDirectoryPath: /tmp/parquet_uploads/ # In case the "isTestEnvironment" is "true", this is automatically supplemented by a "/test/" subdirectory, to avoid any conflicts.
# Prometheus related config.
management:
endpoint:
health:
enabled: true
show-details: always
metrics:
enabled: true
prometheus:
enabled: true
endpoints:
web:
base-path: /actuator
exposure:
include: health,info,prometheus,metrics
metrics:
tags:
application: ${spring.application.name}
logging:
level:
root: INFO
eu:
openaire:
urls_controller: DEBUG
org:
springframework:
security: WARN
web: INFO
apache:
hadoop:
io:
compress: WARN