sourcePath
the source path
isLookUpUrl
the isLookup service endpoint
pathMap
the json path associated to each selection field
outputPath
the output path
postgresURL
the url of the postgress server to query
postgresUser
the username to access the postgres db
postgresPassword
the postgres password
${jobTracker}
${nameNode}
oozie.action.sharelib.for.spark
${oozieActionShareLibForSpark2}
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${nameNode}/${sourcePath}/relation
${nameNode}/${outputPath}/relation
${nameNode}/${sourcePath}/organization
${nameNode}/${outputPath}/organization
${nameNode}/${sourcePath}/project
${nameNode}/${outputPath}/project
${nameNode}/${sourcePath}/datasource
${nameNode}/${outputPath}/datasource
yarn-cluster
cluster
bulkTagging-publication
eu.dnetlib.dhp.bulktag.SparkBulkTagJob
dhp-enrichment-${projectVersion}.jar
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--sourcePath${sourcePath}/publication
--resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
--outputPath${outputPath}/publication
--pathMap${pathMap}
--isLookUpUrl${isLookUpUrl}
yarn-cluster
cluster
bulkTagging-dataset
eu.dnetlib.dhp.bulktag.SparkBulkTagJob
dhp-enrichment-${projectVersion}.jar
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--sourcePath${sourcePath}/dataset
--resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
--outputPath${outputPath}/dataset
--pathMap${pathMap}
--isLookUpUrl${isLookUpUrl}
yarn-cluster
cluster
bulkTagging-orp
eu.dnetlib.dhp.bulktag.SparkBulkTagJob
dhp-enrichment-${projectVersion}.jar
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--sourcePath${sourcePath}/otherresearchproduct
--resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
--outputPath${outputPath}/otherresearchproduct
--pathMap${pathMap}
--isLookUpUrl${isLookUpUrl}
yarn-cluster
cluster
bulkTagging-software
eu.dnetlib.dhp.bulktag.SparkBulkTagJob
dhp-enrichment-${projectVersion}.jar
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--sourcePath${sourcePath}/software
--resultTableNameeu.dnetlib.dhp.schema.oaf.Software
--outputPath${outputPath}/software
--pathMap${pathMap}
--isLookUpUrl${isLookUpUrl}
yarn-cluster
cluster
EOSC_tagging
eu.dnetlib.dhp.bulktag.eosc.SparkEoscTag
dhp-enrichment-${projectVersion}.jar
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--sourcePath${outputPath}
--workingPath${workingDir}/eoscTag
eu.dnetlib.dhp.bulktag.eosc.ReadMasterDatasourceFromDB
--hdfsPath${workingDir}/datasourcemaster
--hdfsNameNode${nameNode}
--postgresUrl${postgresURL}
--postgresUser${postgresUser}
--postgresPassword${postgresPassword}
yarn-cluster
cluster
EOSC_tagging
eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag
dhp-enrichment-${projectVersion}.jar
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--sourcePath${outputPath}/publication
--workingPath${workingDir}/eoscContextTag/publication
--resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
--datasourceMapPath${workingDir}/datasourcemaster
yarn-cluster
cluster
EOSC_tagging
eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag
dhp-enrichment-${projectVersion}.jar
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--sourcePath${outputPath}/dataset
--workingPath${workingDir}/eoscContextTag/dataset
--resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
--datasourceMapPath${workingDir}/datasourcemaster
yarn-cluster
cluster
EOSC_tagging
eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag
dhp-enrichment-${projectVersion}.jar
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--sourcePath${outputPath}/software
--workingPath${workingDir}/eoscContextTag/software
--resultTableNameeu.dnetlib.dhp.schema.oaf.Software
--datasourceMapPath${workingDir}/datasourcemaster
yarn-cluster
cluster
EOSC_tagging
eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag
dhp-enrichment-${projectVersion}.jar
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--sourcePath${outputPath}/otherresearchproduct
--workingPath${workingDir}/eoscContextTag/otherresearchproduct
--resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
--datasourceMapPath${workingDir}/datasourcemaster