87 lines
3.2 KiB
Properties
87 lines
3.2 KiB
Properties
# The following set of properties are defined in https://support.openaire.eu/projects/openaire/wiki/Hadoop_clusters
|
|
# and concern the parameterization required for running workflows on the @GARR cluster
|
|
|
|
dhp.hadoop.frontend.temp.dir=/home/ilias.kanellos
|
|
dhp.hadoop.frontend.user.name=ilias.kanellos
|
|
dhp.hadoop.frontend.host.name=iis-cdh5-test-gw.ocean.icm.edu.pl
|
|
dhp.hadoop.frontend.port.ssh=22
|
|
oozieServiceLoc=http://iis-cdh5-test-m3:11000/oozie
|
|
jobTracker=yarnRM
|
|
nameNode=hdfs://nameservice1
|
|
oozie.execution.log.file.location = target/extract-and-run-on-remote-host.log
|
|
maven.executable=mvn
|
|
sparkDriverMemory=7G
|
|
sparkExecutorMemory=7G
|
|
sparkExecutorCores=4
|
|
# The above is given differently in an example I found online
|
|
oozie.action.sharelib.for.spark=spark2
|
|
oozieActionShareLibForSpark2=spark2
|
|
spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
|
|
spark2EventLogDir=/user/spark/spark2ApplicationHistory
|
|
sparkSqlWarehouseDir=/user/hive/warehouse
|
|
hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
|
|
# This MAY avoid the no library used error
|
|
oozie.use.system.libpath=true
|
|
# Some stuff copied from openaire's jobs
|
|
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
|
|
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
|
|
|
|
|
|
# Some stuff copied from openaire's jobs
|
|
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
|
|
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
|
|
|
|
# ------------------------------------------------------------------------------ #
|
|
# The following set of properties are my own custom ones
|
|
|
|
# Based on the page linked to at the start of the file, if we use yarn as a resource manager, its address is given as follows
|
|
resourceManager=http://iis-cdh5-test-m2.ocean.icm.edu.pl:8088/cluster
|
|
|
|
# current year used when creating graph / by some ranking methods
|
|
currentYear=2024
|
|
|
|
# Alpha value for pagerank
|
|
pageRankAlpha=0.5
|
|
# AttRank values
|
|
attrankAlpha=0.2
|
|
attrankBeta=0.5
|
|
attrankGamma=0.3
|
|
attrankRho=-0.16
|
|
# attrankCurrentYear=2023
|
|
attrankStartYear=2021
|
|
|
|
# Ram values
|
|
ramGamma=0.6
|
|
# ramCurrentYear=2023
|
|
|
|
# Convergence error for pagerank
|
|
convergenceError=0.000000000001
|
|
|
|
# I think this should be the oozie workflow directory
|
|
oozieWorkflowPath=user/ilias.kanellos/workflow_example/
|
|
|
|
# The directory where the workflow data is/should be stored
|
|
workflowDataDir=user/ilias.kanellos/ranking_workflow
|
|
|
|
# Directory where dataframes are checkpointed
|
|
checkpointDir=${nameNode}/${workflowDataDir}/check/
|
|
|
|
# The directory for the doi-based bip graph
|
|
bipGraphFilePath=${nameNode}/${workflowDataDir}/bipdbv8_graph
|
|
|
|
# The folder from which synonyms of openaire-ids are read
|
|
# openaireDataInput=${nameNode}/tmp/beta_provision/graph/21_graph_cleaned/
|
|
openaireDataInput=${/tmp/prod_provision/graph/18_graph_blacklisted}
|
|
|
|
# A folder where we will write the openaire to doi mapping
|
|
synonymFolder=${nameNode}/${workflowDataDir}/openaireid_to_dois/
|
|
|
|
# This will be where we store the openaire graph input. They told us on GARR to use a directory under /data
|
|
openaireGraphInputPath=${nameNode}/${workflowDataDir}/openaire_id_graph
|
|
|
|
# The workflow application path
|
|
wfAppPath=${nameNode}/${oozieWorkflowPath}
|
|
# The following is needed as a property of a workflow
|
|
oozie.wf.application.path=${wfAppPath}
|
|
|