dnet-hadoop/dhp-workflows/dhp-impact-indicators/src/main/resources/eu.dnetlib/job.properties

# The following set of properties are defined in https://support.openaire.eu/projects/openaire/wiki/Hadoop_clusters
# and concern the parameterization required for running workflows on the @GARR cluster

dhp.hadoop.frontend.temp.dir=/home/ilias.kanellos
dhp.hadoop.frontend.user.name=ilias.kanellos
dhp.hadoop.frontend.host.name=iis-cdh5-test-gw.ocean.icm.edu.pl
dhp.hadoop.frontend.port.ssh=22
oozieServiceLoc=http://iis-cdh5-test-m3:11000/oozie
jobTracker=yarnRM
nameNode=hdfs://nameservice1
oozie.execution.log.file.location = target/extract-and-run-on-remote-host.log
maven.executable=mvn
sparkDriverMemory=7G
sparkExecutorMemory=7G
sparkExecutorCores=4
# The above is given differently in an example I found online
oozie.action.sharelib.for.spark=spark2
oozieActionShareLibForSpark2=spark2
spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
spark2EventLogDir=/user/spark/spark2ApplicationHistory
sparkSqlWarehouseDir=/user/hive/warehouse
hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
# This MAY avoid the no library used error
oozie.use.system.libpath=true
# Some stuff copied from openaire's jobs
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener


# Some stuff copied from openaire's jobs
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener

# ------------------------------------------------------------------------------ #
# The following set of properties are my own custom ones

# Based on the page linked to at the start of the file, if we use yarn as a resource manager, its address is given as follows
resourceManager=http://iis-cdh5-test-m2.ocean.icm.edu.pl:8088/cluster

# current year used when creating graph / by some ranking methods
currentYear=2024

# Alpha value for pagerank
pageRankAlpha=0.5
# AttRank values
attrankAlpha=0.2
attrankBeta=0.5
attrankGamma=0.3
attrankRho=-0.16
# attrankCurrentYear=2023
attrankStartYear=2021

# Ram values
ramGamma=0.6
# ramCurrentYear=2023

# Convergence error for pagerank
convergenceError=0.000000000001

# I think this should be the oozie workflow directory
oozieWorkflowPath=user/ilias.kanellos/workflow_example/

# The directory where the workflow data is/should be stored
workflowDataDir=user/ilias.kanellos/ranking_workflow

# Directory where dataframes are checkpointed
checkpointDir=${nameNode}/${workflowDataDir}/check/

# The directory for the doi-based bip graph
bipGraphFilePath=${nameNode}/${workflowDataDir}/bipdbv8_graph

# The folder from which synonyms of openaire-ids are read
# openaireDataInput=${nameNode}/tmp/beta_provision/graph/21_graph_cleaned/
openaireDataInput=${/tmp/prod_provision/graph/18_graph_blacklisted}

# A folder where we will write the openaire to doi mapping
synonymFolder=${nameNode}/${workflowDataDir}/openaireid_to_dois/

# This will be where we store the openaire graph input. They told us on GARR to use a directory under /data
openaireGraphInputPath=${nameNode}/${workflowDataDir}/openaire_id_graph

# The workflow application path
wfAppPath=${nameNode}/${oozieWorkflowPath}
# The following is needed as a property of a workflow
oozie.wf.application.path=${wfAppPath}