inputGraphRootPath root location of input materialized graph isLookupUrl URL for the isLookup service relPartitions number or partitions for the relations Dataset relationFilter filter applied reading relations (by relClass) maxRelations maximum number of relations allowed for a each entity otherDsTypeId mapping used to populate datasourceTypeUi field format metadata format name (DMF|TMF) batchSize number of records to be included in each indexing request sparkDriverMemoryForJoining memory for driver process sparkExecutorMemoryForJoining memory for individual executor sparkExecutorCoresForJoining number of cores used by single executor sparkDriverMemoryForIndexing memory for driver process sparkExecutorMemoryForIndexing memory for individual executor sparkExecutorCoresForIndexing number of cores used by single executor oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* spark2ExtraListeners com.cloudera.spark.lineage.NavigatorAppListener spark 2.* extra listeners classname spark2SqlQueryExecutionListeners com.cloudera.spark.lineage.NavigatorQueryListener spark 2.* sql query execution listeners classname spark2YarnHistoryServerAddress spark 2.* yarn history server address spark2EventLogDir spark 2.* event log dir location sparkNetworkTimeout configures spark.network.timeout ${jobTracker} ${nameNode} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} ${wf:conf('resumeFrom') eq 'prepare_relations'} ${wf:conf('resumeFrom') eq 'fork_join_related_entities'} ${wf:conf('resumeFrom') eq 'join_all_entities'} ${wf:conf('resumeFrom') eq 'adjancency_lists'} ${wf:conf('resumeFrom') eq 'convert_to_xml'} ${wf:conf('resumeFrom') eq 'to_solr_index'} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] yarn cluster PrepareRelations eu.dnetlib.dhp.oa.provision.PrepareRelationsJob dhp-graph-provision-${projectVersion}.jar --executor-cores=${sparkExecutorCoresForJoining} --executor-memory=${sparkExecutorMemoryForJoining} --driver-memory=${sparkDriverMemoryForJoining} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --inputRelationsPath${inputGraphRootPath}/relation --outputPath${workingDir}/relation --relPartitions3000 yarn cluster Join[relation.target = publication.id] eu.dnetlib.dhp.oa.provision.CreateRelatedEntitiesJob_phase1 dhp-graph-provision-${projectVersion}.jar --executor-cores=${sparkExecutorCoresForJoining} --executor-memory=${sparkExecutorMemoryForJoining} --driver-memory=${sparkDriverMemoryForJoining} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --conf spark.network.timeout=${sparkNetworkTimeout} --inputRelationsPath${workingDir}/relation --inputEntityPath${inputGraphRootPath}/publication --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${workingDir}/join_partial yarn cluster Join[relation.target = dataset.id] eu.dnetlib.dhp.oa.provision.CreateRelatedEntitiesJob_phase1 dhp-graph-provision-${projectVersion}.jar --executor-cores=${sparkExecutorCoresForJoining} --executor-memory=${sparkExecutorMemoryForJoining} --driver-memory=${sparkDriverMemoryForJoining} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --conf spark.network.timeout=${sparkNetworkTimeout} --inputRelationsPath${workingDir}/relation --inputEntityPath${inputGraphRootPath}/dataset --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${workingDir}/join_partial yarn cluster Join[relation.target = otherresearchproduct.id] eu.dnetlib.dhp.oa.provision.CreateRelatedEntitiesJob_phase1 dhp-graph-provision-${projectVersion}.jar --executor-cores=${sparkExecutorCoresForJoining} --executor-memory=${sparkExecutorMemoryForJoining} --driver-memory=${sparkDriverMemoryForJoining} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --conf spark.network.timeout=${sparkNetworkTimeout} --inputRelationsPath${workingDir}/relation --inputEntityPath${inputGraphRootPath}/otherresearchproduct --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${workingDir}/join_partial yarn cluster Join[relation.target = software.id] eu.dnetlib.dhp.oa.provision.CreateRelatedEntitiesJob_phase1 dhp-graph-provision-${projectVersion}.jar --executor-cores=${sparkExecutorCoresForJoining} --executor-memory=${sparkExecutorMemoryForJoining} --driver-memory=${sparkDriverMemoryForJoining} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --conf spark.network.timeout=${sparkNetworkTimeout} --inputRelationsPath${workingDir}/relation --inputEntityPath${inputGraphRootPath}/software --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${workingDir}/join_partial yarn cluster Join[relation.target = datasource.id] eu.dnetlib.dhp.oa.provision.CreateRelatedEntitiesJob_phase1 dhp-graph-provision-${projectVersion}.jar --executor-cores=${sparkExecutorCoresForJoining} --executor-memory=${sparkExecutorMemoryForJoining} --driver-memory=${sparkDriverMemoryForJoining} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --conf spark.network.timeout=${sparkNetworkTimeout} --inputRelationsPath${workingDir}/relation --inputEntityPath${inputGraphRootPath}/datasource --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource --outputPath${workingDir}/join_partial yarn cluster Join[relation.target = organization.id] eu.dnetlib.dhp.oa.provision.CreateRelatedEntitiesJob_phase1 dhp-graph-provision-${projectVersion}.jar --executor-cores=${sparkExecutorCoresForJoining} --executor-memory=${sparkExecutorMemoryForJoining} --driver-memory=${sparkDriverMemoryForJoining} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --conf spark.network.timeout=${sparkNetworkTimeout} --inputRelationsPath${workingDir}/relation --inputEntityPath${inputGraphRootPath}/organization --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization --outputPath${workingDir}/join_partial yarn cluster Join[relation.target = project.id] eu.dnetlib.dhp.oa.provision.CreateRelatedEntitiesJob_phase1 dhp-graph-provision-${projectVersion}.jar --executor-cores=${sparkExecutorCoresForJoining} --executor-memory=${sparkExecutorMemoryForJoining} --driver-memory=${sparkDriverMemoryForJoining} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --conf spark.network.timeout=${sparkNetworkTimeout} --inputRelationsPath${workingDir}/relation --inputEntityPath${inputGraphRootPath}/project --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project --outputPath${workingDir}/join_partial yarn cluster Join[entities.id = relatedEntity.source] eu.dnetlib.dhp.oa.provision.CreateRelatedEntitiesJob_phase2 dhp-graph-provision-${projectVersion}.jar --executor-cores=${sparkExecutorCoresForJoining} --executor-memory=${sparkExecutorMemoryForJoining} --driver-memory=${sparkDriverMemoryForJoining} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 --conf spark.network.timeout=${sparkNetworkTimeout} --inputGraphRootPath${inputGraphRootPath} --inputRelatedEntitiesPath${workingDir}/join_partial --outputPath${workingDir}/join_entities --numPartitions35000 yarn cluster build_adjacency_lists eu.dnetlib.dhp.oa.provision.AdjacencyListBuilderJob dhp-graph-provision-${projectVersion}.jar --executor-cores=${sparkExecutorCoresForJoining} --executor-memory=${sparkExecutorMemoryForJoining} --driver-memory=${sparkDriverMemoryForJoining} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=15000 --conf spark.network.timeout=${sparkNetworkTimeout} --inputPath${workingDir}/join_entities --outputPath${workingDir}/joined yarn cluster convert_to_xml eu.dnetlib.dhp.oa.provision.XmlConverterJob dhp-graph-provision-${projectVersion}.jar --executor-cores=${sparkExecutorCoresForJoining} --executor-memory=${sparkExecutorMemoryForJoining} --driver-memory=${sparkDriverMemoryForJoining} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 --conf spark.network.timeout=${sparkNetworkTimeout} --inputPath${workingDir}/joined --outputPath${workingDir}/xml --isLookupUrl${isLookupUrl} --otherDsTypeId${otherDsTypeId} yarn cluster to_solr_index eu.dnetlib.dhp.oa.provision.XmlIndexingJob dhp-graph-provision-${projectVersion}.jar --executor-memory=${sparkExecutorMemoryForIndexing} --driver-memory=${sparkDriverMemoryForIndexing} --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${sparkExecutorCoresForIndexing} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.speculation=false --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false --inputPath${workingDir}/xml --isLookupUrl ${isLookupUrl} --format${format} --batchSize${batchSize}