106 lines
3.8 KiB
Properties
106 lines
3.8 KiB
Properties
# The following set of properties are defined in https://support.openaire.eu/projects/openaire/wiki/Hadoop_clusters
|
|
# and concern the parameterization required for running workflows on the @GARR cluster
|
|
|
|
# --- You can override the following properties (if needed) coming from your ~/.dhp/application.properties ---
|
|
# dhp.hadoop.frontend.temp.dir=/home/ilias.kanellos
|
|
# dhp.hadoop.frontend.user.name=ilias.kanellos
|
|
# dhp.hadoop.frontend.host.name=iis-cdh5-test-gw.ocean.icm.edu.pl
|
|
# dhp.hadoop.frontend.port.ssh=22
|
|
# oozieServiceLoc=http://iis-cdh5-test-m3:11000/oozie
|
|
# jobTracker=yarnRM
|
|
# nameNode=hdfs://nameservice1
|
|
# oozie.execution.log.file.location = target/extract-and-run-on-remote-host.log
|
|
# maven.executable=mvn
|
|
|
|
# Some memory and driver settings for more demanding tasks
|
|
sparkHighDriverMemory=20G
|
|
sparkNormalDriverMemory=10G
|
|
|
|
sparkHighExecutorMemory=20G
|
|
sparkNormalExecutorMemory=10G
|
|
|
|
sparkExecutorCores=4
|
|
sparkShufflePartitions=7680
|
|
|
|
# The above is given differently in an example I found online
|
|
oozie.action.sharelib.for.spark=spark2
|
|
oozieActionShareLibForSpark2=spark2
|
|
spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
|
|
spark2EventLogDir=/user/spark/spark2ApplicationHistory
|
|
sparkSqlWarehouseDir=/user/hive/warehouse
|
|
hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
|
|
# This MAY avoid the no library used error
|
|
oozie.use.system.libpath=true
|
|
# Some stuff copied from openaire's jobs
|
|
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
|
|
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
|
|
|
|
|
|
# Some stuff copied from openaire's jobs
|
|
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
|
|
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
|
|
|
|
# ------------------------------------------------------------------------------ #
|
|
# The following set of properties are my own custom ones
|
|
|
|
# Based on the page linked to at the start of the file, if we use yarn as a resource manager, its address is given as follows
|
|
resourceManager=http://iis-cdh5-test-m2.ocean.icm.edu.pl:8088/cluster
|
|
|
|
# current year used when creating graph / by some ranking methods
|
|
currentYear=2024
|
|
|
|
# Alpha value for pagerank
|
|
pageRankAlpha=0.5
|
|
# AttRank values
|
|
attrankAlpha=0.2
|
|
attrankBeta=0.5
|
|
attrankGamma=0.3
|
|
attrankRho=-0.16
|
|
# attrankCurrentYear=2023
|
|
attrankStartYear=2021
|
|
|
|
# Ram values
|
|
ramGamma=0.6
|
|
# ramCurrentYear=2023
|
|
|
|
# Convergence error for pagerank
|
|
convergenceError=0.000000000001
|
|
|
|
# I think this should be the oozie workflow directory
|
|
# oozieWorkflowPath=user/ilias.kanellos/workflow_example/
|
|
|
|
# Directory where json data containing scores will be output
|
|
bipScorePath=${workingDir}/openaire_universe_scores/
|
|
|
|
# Directory where dataframes are checkpointed
|
|
checkpointDir=${nameNode}/${workingDir}/check/
|
|
|
|
# The directory for the doi-based bip graph
|
|
# bipGraphFilePath=${nameNode}/${workingDir}/bipdbv8_graph
|
|
|
|
# The folder from which synonyms of openaire-ids are read
|
|
# openaireDataInput=${nameNode}/tmp/beta_provision/graph/21_graph_cleaned/
|
|
openaireDataInput=/tmp/prod_provision/graph/18_graph_blacklisted
|
|
|
|
# A folder where we will write the openaire to doi mapping
|
|
synonymFolder=${nameNode}/${workingDir}/openaireid_to_dois/
|
|
|
|
# This will be where we store the openaire graph input. They told us on GARR to use a directory under /data
|
|
openaireGraphInputPath=${nameNode}/${workingDir}/openaire_id_graph
|
|
|
|
# The workflow application path
|
|
wfAppPath=${oozieTopWfApplicationPath}
|
|
|
|
# The following is needed as a property of a workflow
|
|
#oozie.wf.application.path=${wfAppPath}
|
|
oozie.wf.application.path=${oozieTopWfApplicationPath}
|
|
|
|
|
|
# Path where the final output should be?
|
|
actionSetOutputPath=${workingDir}/bip_actionsets/
|
|
|
|
# The directory to store project impact indicators
|
|
projectImpactIndicatorsOutput=${workingDir}/project_indicators
|
|
|
|
resume=create-openaire-ranking-graph
|