graphPath
the path to store the original ORCID dump
targetPath
the path to store the original ORCID dump
apiURL
http://74804fb637bd8e2fba5b-e0a029c2f87486cddec3b416996a6057.r3.cf1.rackcdn.com/last_modified.csv.tar
The URL of the update CSV list
accessToken
The access token
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
yarn
cluster
Check Latest Orcid and Download updates
eu.dnetlib.dhp.collection.orcid.OrcidGetUpdatesFile
dhp-aggregation-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.executor.memoryOverhead=2g
--conf spark.sql.shuffle.partitions=3000
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--masteryarn
--namenode${nameNode}
--graphPath${graphPath}
--targetPath${targetPath}
--apiURL${apiURL}
--accessToken${accessToken}
yarn
cluster
Generate ORCID Tables
eu.dnetlib.dhp.collection.orcid.SparkGenerateORCIDTable
dhp-aggregation-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.executor.memoryOverhead=2g
--conf spark.sql.shuffle.partitions=3000
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--sourcePath${targetPath}
--targetPath${targetPath}/updateTable
--fromUpdatetrue
--masteryarn
yarn
cluster
Update ORCID Tables
eu.dnetlib.dhp.collection.orcid.SparkApplyUpdate
dhp-aggregation-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.executor.memoryOverhead=2g
--conf spark.sql.shuffle.partitions=3000
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--graphPath${graphPath}
--updatePath${targetPath}/updateTable
--targetPath${targetPath}/newTable
--masteryarn