workingDirPath
the source path
graphPath
the graph path
index
the index name
esCluster
the Index cluster
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${jobTracker}
${nameNode}
yarn-cluster
cluster
calculate for each ID the number of related Dataset, publication and Unknown
eu.dnetlib.dhp.provision.SparkExtractRelationCount
dhp-graph-provision-scholexplorer-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT}
-mt yarn-cluster
--workingDirPath${workingDirPath}
--relationPath${graphPath}/relation
${jobTracker}
${nameNode}
yarn-cluster
cluster
generate Summary
eu.dnetlib.dhp.provision.SparkGenerateSummaryIndex
dhp-graph-provision-scholexplorer-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.sql.shuffle.partitions=4000 ${sparkExtraOPT}
-mt yarn-cluster
--workingDirPath${workingDirPath}
--graphPath${graphPath}
${jobTracker}
${nameNode}
yarn-cluster
cluster
generate Scholix
eu.dnetlib.dhp.provision.SparkGenerateScholixIndex
dhp-graph-provision-scholexplorer-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.sql.shuffle.partitions=4000 ${sparkExtraOPT}
-mt yarn-cluster
--workingDirPath${workingDirPath}
--graphPath${graphPath}
${jobTracker}
${nameNode}
yarn-cluster
cluster
generate Scholix
eu.dnetlib.dhp.provision.SparkConvertDatasetToJson
dhp-graph-provision-scholexplorer-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.sql.shuffle.partitions=4000 ${sparkExtraOPT}
-m yarn-cluster
--workingPath${workingDirPath}
${jobTracker}
${nameNode}
eu.dnetlib.dhp.provision.DropAndCreateESIndex
-i${index}
-c${esCluster}
${jobTracker}
${nameNode}
yarn-cluster
cluster
index summary
eu.dnetlib.dhp.provision.SparkIndexCollectionOnES
dhp-graph-provision-scholexplorer-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="8"
-mt yarn-cluster
--sourcePath${workingDirPath}/summary_json
--index${index}_object
--idPathid
--cluster${esCluster}
${jobTracker}
${nameNode}
yarn-cluster
cluster
index scholix
eu.dnetlib.dhp.provision.SparkIndexCollectionOnES
dhp-graph-provision-scholexplorer-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="8"
-mt yarn-cluster
--sourcePath${workingDirPath}/scholix_json
--index${index}_scholix
--idPathidentifier
--cluster${esCluster}