# The following set of properties are defined in https://support.openaire.eu/projects/openaire/wiki/Hadoop_clusters # and concern the parameterization required for running workflows on the @GARR cluster # --- You can override the following properties (if needed) coming from your ~/.dhp/application.properties --- # dhp.hadoop.frontend.temp.dir=/home/ilias.kanellos # dhp.hadoop.frontend.user.name=ilias.kanellos # dhp.hadoop.frontend.host.name=iis-cdh5-test-gw.ocean.icm.edu.pl # dhp.hadoop.frontend.port.ssh=22 # oozieServiceLoc=http://iis-cdh5-test-m3:11000/oozie # jobTracker=yarnRM # nameNode=hdfs://nameservice1 # oozie.execution.log.file.location = target/extract-and-run-on-remote-host.log # maven.executable=mvn # Some memory and driver settings for more demanding tasks sparkHighDriverMemory=20G sparkNormalDriverMemory=10G sparkHighExecutorMemory=20G sparkNormalExecutorMemory=10G sparkExecutorCores=4 sparkShufflePartitions=7680 # The above is given differently in an example I found online oozie.action.sharelib.for.spark=spark2 oozieActionShareLibForSpark2=spark2 spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 spark2EventLogDir=/user/spark/spark2ApplicationHistory sparkSqlWarehouseDir=/user/hive/warehouse hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 # This MAY avoid the no library used error oozie.use.system.libpath=true # Some stuff copied from openaire's jobs spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener # Some stuff copied from openaire's jobs spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener # ------------------------------------------------------------------------------ # # The following set of properties are my own custom ones # Based on the page linked to at the start of the file, if we use yarn as a resource manager, its address is given as follows resourceManager=http://iis-cdh5-test-m2.ocean.icm.edu.pl:8088/cluster # current year used when creating graph / by some ranking methods currentYear=2023 # Alpha value for pagerank pageRankAlpha=0.5 # AttRank values attrankAlpha=0.2 attrankBeta=0.5 attrankGamma=0.3 attrankRho=-0.16 # attrankCurrentYear=2023 attrankStartYear=2021 # Ram values ramGamma=0.6 # ramCurrentYear=2023 # Convergence error for pagerank convergenceError=0.000000000001 # I think this should be the oozie workflow directory # oozieWorkflowPath=user/ilias.kanellos/workflow_example/ # Directory where json data containing scores will be output bipScorePath=${workingDir}/openaire_universe_scores/ # Directory where dataframes are checkpointed checkpointDir=${nameNode}/${workingDir}/check/ # The directory for the doi-based bip graph # bipGraphFilePath=${nameNode}/${workingDir}/bipdbv8_graph # The folder from which synonyms of openaire-ids are read # openaireDataInput=${nameNode}/tmp/beta_provision/graph/21_graph_cleaned/ openaireDataInput=/tmp/prod_provision/graph/18_graph_blacklisted # A folder where we will write the openaire to doi mapping synonymFolder=${nameNode}/${workingDir}/openaireid_to_dois/ # This will be where we store the openaire graph input. They told us on GARR to use a directory under /data openaireGraphInputPath=${nameNode}/${workingDir}/openaire_id_graph # The workflow application path wfAppPath=${oozieTopWfApplicationPath} # The following is needed as a property of a workflow #oozie.wf.application.path=${wfAppPath} oozie.wf.application.path=${oozieTopWfApplicationPath} # Path where the final output should be? actionSetOutputPath=${workingDir}/bip_actionsets # The directory to store project impact indicators projectImpactIndicatorsOutput=${workingDir}/project_indicators resume=entry-point-decision