graphInputPath
the input path to read graph content
graphOutputPath
the target path to store fixed graph
isLookupUrl
the address of the lookUp service
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
number of cores used by single executor
oozieActionShareLibForSpark2
oozie action sharelib for spark 2.*
spark2ExtraListeners
com.cloudera.spark.lineage.NavigatorAppListener
spark 2.* extra listeners classname
spark2SqlQueryExecutionListeners
com.cloudera.spark.lineage.NavigatorQueryListener
spark 2.* sql query execution listeners classname
spark2YarnHistoryServerAddress
spark 2.* yarn history server address
spark2EventLogDir
spark 2.* event log dir location
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
yarn
cluster
Fix publications
eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCoresForJoining}
--executor-memory=${sparkExecutorMemoryForJoining}
--driver-memory=${sparkDriverMemoryForJoining}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--conf spark.network.timeout=${sparkNetworkTimeout}
--inputPath${graphInputPath}/publication
--outputPath${graphOutputPath}/publication
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication
--isLookupUrl${isLookupUrl}
yarn
cluster
Fix datasets
eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCoresForJoining}
--executor-memory=${sparkExecutorMemoryForJoining}
--driver-memory=${sparkDriverMemoryForJoining}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--conf spark.network.timeout=${sparkNetworkTimeout}
--inputPath${graphInputPath}/dataset
--outputPath${graphOutputPath}/dataset
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset
--isLookupUrl${isLookupUrl}
yarn
cluster
Fix otherresearchproducts
eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCoresForJoining}
--executor-memory=${sparkExecutorMemoryForJoining}
--driver-memory=${sparkDriverMemoryForJoining}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--conf spark.network.timeout=${sparkNetworkTimeout}
--inputPath${graphInputPath}/otherresearchproduct
--outputPath${graphOutputPath}/otherresearchproduct
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
--isLookupUrl${isLookupUrl}
yarn
cluster
Fix softwares
eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCoresForJoining}
--executor-memory=${sparkExecutorMemoryForJoining}
--driver-memory=${sparkDriverMemoryForJoining}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--conf spark.network.timeout=${sparkNetworkTimeout}
--inputPath${graphInputPath}/software
--outputPath${graphOutputPath}/software
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software
--isLookupUrl${isLookupUrl}
yarn
cluster
Fix datasources
eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCoresForJoining}
--executor-memory=${sparkExecutorMemoryForJoining}
--driver-memory=${sparkDriverMemoryForJoining}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--conf spark.network.timeout=${sparkNetworkTimeout}
--inputPath${graphInputPath}/datasource
--outputPath${graphOutputPath}/datasource
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource
--isLookupUrl${isLookupUrl}
yarn
cluster
Fix organizations
eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCoresForJoining}
--executor-memory=${sparkExecutorMemoryForJoining}
--driver-memory=${sparkDriverMemoryForJoining}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--conf spark.network.timeout=${sparkNetworkTimeout}
--inputPath${graphInputPath}/organization
--outputPath${graphOutputPath}/organization
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization
--isLookupUrl${isLookupUrl}
yarn
cluster
Fix projects
eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCoresForJoining}
--executor-memory=${sparkExecutorMemoryForJoining}
--driver-memory=${sparkDriverMemoryForJoining}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--conf spark.network.timeout=${sparkNetworkTimeout}
--inputPath${graphInputPath}/project
--outputPath${graphOutputPath}/project
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project
--isLookupUrl${isLookupUrl}
yarn
cluster
Fix relations
eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCoresForJoining}
--executor-memory=${sparkExecutorMemoryForJoining}
--driver-memory=${sparkDriverMemoryForJoining}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--conf spark.network.timeout=${sparkNetworkTimeout}
--inputPath${graphInputPath}/relation
--outputPath${graphOutputPath}/relation
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Relation
--isLookupUrl${isLookupUrl}