workingDirPath
the source path
graphPath
the graph path
index
index name
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
number of cores used by single executor
idScholix
the
idSummary
number of cores used by single executor
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${jobTracker}
${nameNode}
yarn-cluster
cluster
calculate for each ID the number of related Dataset, publication and Unknown
eu.dnetlib.dhp.provision.SparkExtractRelationCount
dhp-graph-provision-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT}
-mt yarn-cluster
--workingDirPath${workingDirPath}
--relationPath${graphPath}/relation
${jobTracker}
${nameNode}
yarn-cluster
cluster
generate Summary
eu.dnetlib.dhp.provision.SparkGenerateSummary
dhp-graph-provision-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT}
-mt yarn-cluster
--workingDirPath${workingDirPath}
--graphPath${graphPath}
${jobTracker}
${nameNode}
yarn-cluster
cluster
generate Scholix
eu.dnetlib.dhp.provision.SparkGenerateScholix
dhp-graph-provision-${projectVersion}.jar
--executor-memory 6G --driver-memory=${sparkDriverMemory} ${sparkExtraOPT}
-mt yarn-cluster
--workingDirPath${workingDirPath}
--graphPath${graphPath}
${jobTracker}
${nameNode}
yarn-cluster
cluster
index Summary
eu.dnetlib.dhp.provision.SparkIndexCollectionOnES
dhp-graph-provision-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="64"
-mt yarn-cluster
--sourcePath${workingDirPath}/summary
--index${index}_object
--idPathid
--typesummary
${jobTracker}
${nameNode}
yarn-cluster
cluster
index scholix
eu.dnetlib.dhp.provision.SparkIndexCollectionOnES
dhp-graph-provision-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="8"
-mt yarn-cluster
--sourcePath${workingDirPath}/scholix_json
--index${index}_scholix
--idPathidentifier
--typescholix