sourcePath the working dir base path targetPath the final graph path relationFilter Filter relation semantic maxNumberOfPid filter relation with at least #maxNumberOfPid dumpCitations false should dump citation relations Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] yarn cluster Import JSONRDD to Dataset kryo eu.dnetlib.dhp.sx.graph.SparkConvertRDDtoDataset dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.shuffle.partitions=3000 --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --masteryarn --sourcePath${sourcePath} --targetPath${targetPath} --filterRelation${relationFilter} yarn cluster Convert Entities to summaries eu.dnetlib.dhp.sx.graph.SparkCreateSummaryObject dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.shuffle.partitions=20000 --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --masteryarn --sourcePath${targetPath}/entities --targetPath${targetPath}/provision/summaries yarn cluster Generate Scholix Dataset eu.dnetlib.dhp.sx.graph.SparkCreateScholix dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.shuffle.partitions=30000 --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --masteryarn --summaryPath${targetPath}/provision/summaries --targetPath${targetPath}/provision/scholix --relationPath${targetPath}/relation --dumpCitations${dumpCitations} yarn cluster Serialize scholix to JSON eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.shuffle.partitions=6000 --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --masteryarn --sourcePath${targetPath}/provision/scholix/scholix --targetPath${targetPath}/json/scholix_json --objectTypescholix --maxPidNumberFiltermaxNumberOfPid eu.dnetlib.dhp.common.MakeTarArchive --nameNode${nameNode} --hdfsPath${targetPath}/tar --sourcePath${targetPath}/json