dnet-hadoop/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/job.properties

106 lines
3.8 KiB
Properties

# The following set of properties are defined in https://support.openaire.eu/projects/openaire/wiki/Hadoop_clusters
# and concern the parameterization required for running workflows on the @GARR cluster
# --- You can override the following properties (if needed) coming from your ~/.dhp/application.properties ---
# dhp.hadoop.frontend.temp.dir=/home/ilias.kanellos
# dhp.hadoop.frontend.user.name=ilias.kanellos
# dhp.hadoop.frontend.host.name=iis-cdh5-test-gw.ocean.icm.edu.pl
# dhp.hadoop.frontend.port.ssh=22
# oozieServiceLoc=http://iis-cdh5-test-m3:11000/oozie
# jobTracker=yarnRM
# nameNode=hdfs://nameservice1
# oozie.execution.log.file.location = target/extract-and-run-on-remote-host.log
# maven.executable=mvn
# Some memory and driver settings for more demanding tasks
sparkHighDriverMemory=20G
sparkNormalDriverMemory=10G
sparkHighExecutorMemory=20G
sparkNormalExecutorMemory=10G
sparkExecutorCores=4
sparkShufflePartitions=7680
# The above is given differently in an example I found online
oozie.action.sharelib.for.spark=spark2
oozieActionShareLibForSpark2=spark2
spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
spark2EventLogDir=/user/spark/spark2ApplicationHistory
sparkSqlWarehouseDir=/user/hive/warehouse
hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
# This MAY avoid the no library used error
oozie.use.system.libpath=true
# Some stuff copied from openaire's jobs
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
# Some stuff copied from openaire's jobs
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
# ------------------------------------------------------------------------------ #
# The following set of properties are my own custom ones
# Based on the page linked to at the start of the file, if we use yarn as a resource manager, its address is given as follows
resourceManager=http://iis-cdh5-test-m2.ocean.icm.edu.pl:8088/cluster
# current year used when creating graph / by some ranking methods
currentYear=2023
# Alpha value for pagerank
pageRankAlpha=0.5
# AttRank values
attrankAlpha=0.2
attrankBeta=0.5
attrankGamma=0.3
attrankRho=-0.16
# attrankCurrentYear=2023
attrankStartYear=2021
# Ram values
ramGamma=0.6
# ramCurrentYear=2023
# Convergence error for pagerank
convergenceError=0.000000000001
# I think this should be the oozie workflow directory
# oozieWorkflowPath=user/ilias.kanellos/workflow_example/
# Directory where json data containing scores will be output
bipScorePath=${workingDir}/openaire_universe_scores/
# Directory where dataframes are checkpointed
checkpointDir=${nameNode}/${workingDir}/check/
# The directory for the doi-based bip graph
# bipGraphFilePath=${nameNode}/${workingDir}/bipdbv8_graph
# The folder from which synonyms of openaire-ids are read
# openaireDataInput=${nameNode}/tmp/beta_provision/graph/21_graph_cleaned/
openaireDataInput=/tmp/prod_provision/graph/18_graph_blacklisted
# A folder where we will write the openaire to doi mapping
synonymFolder=${nameNode}/${workingDir}/openaireid_to_dois/
# This will be where we store the openaire graph input. They told us on GARR to use a directory under /data
openaireGraphInputPath=${nameNode}/${workingDir}/openaire_id_graph
# The workflow application path
wfAppPath=${oozieTopWfApplicationPath}
# The following is needed as a property of a workflow
#oozie.wf.application.path=${wfAppPath}
oozie.wf.application.path=${oozieTopWfApplicationPath}
# Path where the final output should be?
actionSetOutputPath=${workingDir}/bip_actionsets
# The directory to store project impact indicators
projectImpactIndicatorsOutput=${workingDir}/project_indicators
resume=entry-point-decision