workingDirPath
the source path
graphPath
the graph path
index
index name
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
number of cores used by single executor
idScholix
the
idSummary
number of cores used by single executor
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${jobTracker}
${nameNode}
yarn-cluster
cluster
calculate for each ID the number of related Dataset, publication and Unknown
eu.dnetlib.dhp.provision.SparkExtractRelationCount
dhp-graph-provision-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT}
-mt yarn-cluster
--workingDirPath${workingDirPath}
--relationPath${graphPath}/relation
${jobTracker}
${nameNode}
yarn-cluster
cluster
generate Summary
eu.dnetlib.dhp.provision.SparkGenerateSummary
dhp-graph-provision-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT}
-mt yarn-cluster
--workingDirPath${workingDirPath}
--graphPath${graphPath}
${jobTracker}
${nameNode}
yarn-cluster
cluster
generate Summary
eu.dnetlib.dhp.provision.SparkIndexCollectionOnES
dhp-graph-provision-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --num-executors 20 --driver-memory=${sparkDriverMemory} ${sparkExtraOPT}
-mt yarn-cluster
--sourcePath${workingDirPath}/summary
--index${index}_object
${jobTracker}
${nameNode}
yarn-cluster
cluster
generate Scholix
eu.dnetlib.dhp.provision.SparkGenerateScholix
dhp-graph-provision-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT}
-mt yarn-cluster
--workingDirPath${workingDirPath}
--graphPath${graphPath}
${jobTracker}
${nameNode}
yarn-cluster
cluster
index scholix
eu.dnetlib.dhp.provision.SparkIndexCollectionOnES
dhp-graph-provision-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --num-executors 20 --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="32"
-mt yarn-cluster
--sourcePath${workingDirPath}/scholix_index
--index${index}_scholix