apiDescription A json encoding of the API Description class dataSourceInfo A json encoding of the Datasource Info identifierPath An xpath to retrieve the metadata identifier for the generation of DNet Identifier metadataEncoding The type of the metadata XML/JSON timestamp The timestamp of the collection date workflowId The identifier of the workflow mdStoreID The identifier of the mdStore mdStoreManagerURI The URI of the MDStore Manager dnetMessageManagerURL The URI of the Dnet Message Manager collectionMode Should be REFRESH or INCREMENTAL collection_java_xmx -Xmx200m Used to configure the heap size for the map JVM process. Should be 80% of mapreduce.map.memory.mb. ${jobTracker} ${nameNode} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] ${wf:conf('collectionMode') eq 'REFRESH'} ${wf:conf('collectionMode') eq 'INCREMENTAL'} eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode ${collection_java_xmx} --actionREAD_LOCK --mdStoreID${mdStoreID} --mdStoreManagerURI${mdStoreManagerURI} eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode ${collection_java_xmx} --actionNEW_VERSION --mdStoreID${mdStoreID} --mdStoreManagerURI${mdStoreManagerURI} eu.dnetlib.dhp.collection.CollectorWorkerApplication ${collection_java_xmx} --apidescriptor${apiDescription} --namenode${nameNode} --workflowId${workflowId} --dnetMessageManagerURL${dnetMessageManagerURL} --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} --maxNumberOfRetry${maxNumberOfRetry} --requestDelay${requestDelay} --retryDelay${retryDelay} --connectTimeOut${connectTimeOut} --readTimeOut${readTimeOut} yarn cluster Generate Native MetadataStore eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --encoding${metadataEncoding} --dateOfCollection${timestamp} --provenance${dataSourceInfo} --xpath${identifierPath} --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} --readMdStoreVersion${wf:actionData('BeginRead')['mdStoreReadLockVersion']} ${wf:conf('collectionMode') eq 'REFRESH'} ${wf:conf('collectionMode') eq 'INCREMENTAL'} eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode ${collection_java_xmx} --actionREAD_UNLOCK --mdStoreManagerURI${mdStoreManagerURI} --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode ${collection_java_xmx} --actionCOMMIT --namenode${nameNode} --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} --mdStoreManagerURI${mdStoreManagerURI} ${wf:conf('collectionMode') eq 'REFRESH'} ${wf:conf('collectionMode') eq 'INCREMENTAL'} eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode ${collection_java_xmx} --actionREAD_UNLOCK --mdStoreManagerURI${mdStoreManagerURI} --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode ${collection_java_xmx} --actionROLLBACK --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} --mdStoreManagerURI${mdStoreManagerURI}