stats_db_name
the target stats database name
openaire_db_name
the original graph database name
external_stats_db_name
the external stats that should be added since they are not included in the graph database
usage_stats_db_name
the usage statistics database name
stats_db_shadow_name
the name of the shadow schema
monitor_db_name
the target monitor db name
monitor_db_shadow_name
the name of the shadow monitor db
observatory_db_name
the target monitor db name
observatory_db_shadow_name
the name of the shadow monitor db
usage_stats_db_shadow_name
the name of the shadow usage stats db
stats_tool_api_url
The url of the API of the stats tool. Is used to trigger the cache update.
hive_metastore_uris
hive server metastore URIs
hive_jdbc_url
hive server jdbc url
hive_timeout
the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds.
context_api_url
the base url of the context api (https://services.openaire.eu/openaire)
hadoop_user_name
user name of the wf owner
sparkSqlWarehouseDir
sparkClusterOpts
--conf spark.network.timeout=600 --conf spark.extraListeners= --conf spark.sql.queryExecutionListeners= --conf spark.yarn.historyServer.address=http://iis-cdh5-test-m3.ocean.icm.edu.pl:18088 --conf spark.eventLog.dir=hdfs://nameservice1/user/spark/applicationHistory
spark cluster-wide options
sparkResourceOpts
--executor-memory=6G --conf spark.executor.memoryOverhead=4G --executor-cores=6 --driver-memory=8G --driver-cores=4
spark resource options
sparkApplicationOpts
--conf spark.sql.shuffle.partitions=3840
spark resource options
${jobTracker}
${nameNode}
hive.metastore.uris
${hive_metastore_uris}
hive.txn.timeout
${hive_timeout}
hive.mapjoin.followby.gby.localtask.max.memory.usage
0.80
oozie.action.sharelib.for.spark
${oozieActionShareLibForSpark2}
mapred.job.queue.name
analytics
${wf:conf('resumeFrom') eq 'Step1'}
${wf:conf('resumeFrom') eq 'Step2'}
${wf:conf('resumeFrom') eq 'Step3'}
${wf:conf('resumeFrom') eq 'Step4'}
${wf:conf('resumeFrom') eq 'Step5'}
${wf:conf('resumeFrom') eq 'Step6'}
${wf:conf('resumeFrom') eq 'Step7'}
${wf:conf('resumeFrom') eq 'Step8'}
${wf:conf('resumeFrom') eq 'Step9'}
${wf:conf('resumeFrom') eq 'Step10'}
${wf:conf('resumeFrom') eq 'Step11'}
${wf:conf('resumeFrom') eq 'Step12'}
${wf:conf('resumeFrom') eq 'Step13'}
${wf:conf('resumeFrom') eq 'Step14'}
${wf:conf('resumeFrom') eq 'Step15'}
${wf:conf('resumeFrom') eq 'Step15_5'}
${wf:conf('resumeFrom') eq 'Contexts'}
${wf:conf('resumeFrom') eq 'Step16-createIndicatorsTables'}
${wf:conf('resumeFrom') eq 'Step16_1-definitions'}
${wf:conf('resumeFrom') eq 'Step16_5'}
${wf:conf('resumeFrom') eq 'Step19-finalize'}
${wf:conf('resumeFrom') eq 'step20-createMonitorDB'}
${wf:conf('resumeFrom') eq 'step21-createObservatoryDB-pre'}
${wf:conf('resumeFrom') eq 'step21-createObservatoryDB'}
${wf:conf('resumeFrom') eq 'step21-createObservatoryDB-post'}
${wf:conf('resumeFrom') eq 'step22-copyDataToImpalaCluster'}
${wf:conf('resumeFrom') eq 'step22a-createPDFsAggregated'}
${wf:conf('resumeFrom') eq 'step23-finalizeImpalaCluster'}
${wf:conf('resumeFrom') eq 'Step24-updateCache'}
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
external_stats_db_name=${external_stats_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
external_stats_db_name=${external_stats_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
external_stats_db_name=${external_stats_db_name}
${jobTracker}
${nameNode}
contexts.sh
${context_api_url}
${stats_db_name}
contexts.sh
yarn
cluster
Step16-createIndicatorsTables
eu.dnetlib.dhp.oozie.RunSQLSparkJob
dhp-stats-update-${projectVersion}.jar
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
${sparkClusterOpts}
${sparkResourceOpts}
${sparkApplicationOpts}
--hiveMetastoreUris${hive_metastore_uris}
--sqleu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
--stats_db_name${stats_db_name}
--external_stats_db_name${external_stats_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${hive_jdbc_url}
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
${jobTracker}
${nameNode}
finalizedb.sh
${stats_db_name}
${stats_db_shadow_name}
finalizedb.sh
${jobTracker}
${nameNode}
monitor.sh
${stats_db_name}
${monitor_db_name}
${monitor_db_shadow_name}
${wf:appPath()}/scripts/step20-createMonitorDB.sql
${wf:appPath()}/scripts/step20-createMonitorDB_funded.sql
${wf:appPath()}/scripts/step20-createMonitorDB_institutions.sql
${wf:appPath()}/scripts/step20-createMonitorDB_RIs.sql
${wf:appPath()}/scripts/step20-createMonitorDB_RIs_tail.sql
${wf:appPath()}/scripts/step20-createMonitorDBAll.sql
monitor.sh
${jobTracker}
${nameNode}
observatory-pre.sh
${stats_db_name}
${observatory_db_name}
${observatory_db_shadow_name}
observatory-pre.sh
${hive_jdbc_url}
stats_db_name=${stats_db_name}
observatory_db_name=${observatory_db_name}
${jobTracker}
${nameNode}
observatory-post.sh
${observatory_db_name}
${observatory_db_shadow_name}
observatory-post.sh
${jobTracker}
${nameNode}
copyDataToImpalaCluster.sh
${stats_db_name}
${monitor_db_name}
${observatory_db_name}
${external_stats_db_name}
${usage_stats_db_name}
${hadoop_user_name}
copyDataToImpalaCluster.sh
${jobTracker}
${nameNode}
createPDFsAggregated.sh
${stats_db_name}
${monitor_db_name}
${hadoop_user_name}
createPDFsAggregated.sh
${jobTracker}
${nameNode}
finalizeImpalaCluster.sh
${stats_db_name}
${stats_db_shadow_name}
${monitor_db_name}
${monitor_db_shadow_name}
${observatory_db_name}
${observatory_db_shadow_name}
${usage_stats_db_name}
${usage_stats_db_shadow_name}
finalizeImpalaCluster.sh
${jobTracker}
${nameNode}
updateCache.sh
${stats_tool_api_url}
updateCache.sh