2023-07-17 10:17:53 +02:00
|
|
|
# --- You can override the following properties (if needed) coming from your ~/.dhp/application.properties ---
|
|
|
|
# dhp.hadoop.frontend.temp.dir=/home/ilias.kanellos
|
|
|
|
# dhp.hadoop.frontend.user.name=ilias.kanellos
|
|
|
|
# dhp.hadoop.frontend.host.name=iis-cdh5-test-gw.ocean.icm.edu.pl
|
|
|
|
# dhp.hadoop.frontend.port.ssh=22
|
|
|
|
# oozieServiceLoc=http://iis-cdh5-test-m3:11000/oozie
|
|
|
|
# jobTracker=yarnRM
|
|
|
|
# nameNode=hdfs://nameservice1
|
|
|
|
# oozie.execution.log.file.location = target/extract-and-run-on-remote-host.log
|
|
|
|
# maven.executable=mvn
|
|
|
|
|
|
|
|
# Some memory and driver settings for more demanding tasks
|
|
|
|
sparkDriverMemory=10G
|
|
|
|
sparkExecutorMemory=10G
|
|
|
|
sparkExecutorCores=4
|
|
|
|
sparkShufflePartitions=7680
|
|
|
|
|
|
|
|
# The above is given differently in an example I found online
|
|
|
|
oozie.action.sharelib.for.spark=spark2
|
|
|
|
oozieActionShareLibForSpark2=spark2
|
|
|
|
spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
|
|
|
|
spark2EventLogDir=/user/spark/spark2ApplicationHistory
|
|
|
|
sparkSqlWarehouseDir=/user/hive/warehouse
|
|
|
|
hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
|
|
|
|
# This MAY avoid the no library used error
|
|
|
|
oozie.use.system.libpath=true
|
|
|
|
# Some stuff copied from openaire's jobs
|
|
|
|
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
|
|
|
|
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
|
|
|
|
|
|
|
|
# The following is needed as a property of a workflow
|
|
|
|
oozie.wf.application.path=${oozieTopWfApplicationPath}
|
|
|
|
|
2023-10-25 21:05:02 +02:00
|
|
|
crossrefInputPath=/data/bip-affiliations/data.json
|
|
|
|
pubmedInputPath=/data/bip-affiliations/pubmed-data.json
|
2023-09-04 15:04:41 +02:00
|
|
|
outputPath=/tmp/crossref-affiliations-output-v5
|