forked from D-Net/dnet-hadoop
99 lines
5.3 KiB
XML
99 lines
5.3 KiB
XML
|
<RESOURCE_PROFILE>
|
||
|
<HEADER>
|
||
|
<RESOURCE_IDENTIFIER value="8d36cc94-5b82-413c-923f-e7b3953e41ba_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||
|
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||
|
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||
|
<RESOURCE_URI value=""/>
|
||
|
<DATE_OF_CREATION value="2021-08-06T13:48:17+00:00"/>
|
||
|
</HEADER>
|
||
|
<BODY>
|
||
|
<WORKFLOW_NAME>Update Solr [PROD]</WORKFLOW_NAME>
|
||
|
<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
|
||
|
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||
|
<CONFIGURATION start="manual">
|
||
|
<NODE isStart="true" name="setInputPath" type="SetEnvParameter">
|
||
|
<DESCRIPTION>Set the path containing the GRAPH to index</DESCRIPTION>
|
||
|
<PARAMETERS>
|
||
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">inputGraphRootPath</PARAM>
|
||
|
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/14_graph_blacklisted</PARAM>
|
||
|
</PARAMETERS>
|
||
|
<ARCS>
|
||
|
<ARC to="waitConfig"/>
|
||
|
</ARCS>
|
||
|
</NODE>
|
||
|
<NODE isStart="true" name="setCollection" type="SetEnvParameter">
|
||
|
<DESCRIPTION>Set the target path to store the RAW graph</DESCRIPTION>
|
||
|
<PARAMETERS>
|
||
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">format</PARAM>
|
||
|
<PARAM function="validValues(['TMF', 'DMF'])" managedBy="user" name="parameterValue" required="true" type="string">DMF</PARAM>
|
||
|
</PARAMETERS>
|
||
|
<ARCS>
|
||
|
<ARC to="waitConfig"/>
|
||
|
</ARCS>
|
||
|
</NODE>
|
||
|
<NODE isStart="true" name="setIsLookUpUrl" type="SetEnvParameter">
|
||
|
<DESCRIPTION>Set the lookup address</DESCRIPTION>
|
||
|
<PARAMETERS>
|
||
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">isLookupUrl</PARAM>
|
||
|
<PARAM managedBy="system" name="parameterValue" required="true" type="string">http://services.openaire.eu:8280/is/services/isLookUp?wsdl</PARAM>
|
||
|
</PARAMETERS>
|
||
|
<ARCS>
|
||
|
<ARC to="waitConfig"/>
|
||
|
</ARCS>
|
||
|
</NODE>
|
||
|
<NODE isJoin="true" name="waitConfig">
|
||
|
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||
|
<PARAMETERS/>
|
||
|
<ARCS>
|
||
|
<ARC to="updateSolr"/>
|
||
|
</ARCS>
|
||
|
</NODE>
|
||
|
<NODE name="updateSolr" type="SubmitHadoopJob">
|
||
|
<DESCRIPTION>create the AGGREGATOR graph</DESCRIPTION>
|
||
|
<PARAMETERS>
|
||
|
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||
|
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||
|
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||
|
{
|
||
|
'inputGraphRootPath' : 'inputGraphRootPath',
|
||
|
'isLookupUrl' : 'isLookupUrl',
|
||
|
'format' : 'format'
|
||
|
}
|
||
|
</PARAM>
|
||
|
<PARAM managedBy="system" name="params" required="true" type="string">
|
||
|
{
|
||
|
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/provision/oozie_app',
|
||
|
'sourceMaxRelations' : '1000',
|
||
|
'targetMaxRelations' : '10000000',
|
||
|
'relPartitions' : '3000',
|
||
|
'batchSize' : '2000',
|
||
|
'relationFilter' : 'isAuthorInstitutionOf,produces,hasAmongTopNSimilarDocuments,cites,isCitedBy',
|
||
|
'otherDsTypeId' : 'scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource',
|
||
|
'resumeFrom' : 'prepare_relations',
|
||
|
'shouldIndex' : 'true',
|
||
|
'outputFormat' : 'SOLR',
|
||
|
'sparkDriverMemoryForJoining' : '3G',
|
||
|
'sparkExecutorMemoryForJoining' : '7G',
|
||
|
'sparkExecutorCoresForJoining' : '4',
|
||
|
'sparkDriverMemoryForIndexing' : '2G',
|
||
|
'sparkExecutorMemoryForIndexing' : '2G',
|
||
|
'sparkExecutorCoresForIndexing' : '64',
|
||
|
'sparkNetworkTimeout' : '600',
|
||
|
'workingDir' : '/tmp/prod_provision/working_dir/update_solr'
|
||
|
}
|
||
|
</PARAM>
|
||
|
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||
|
</PARAMETERS>
|
||
|
<ARCS>
|
||
|
<ARC to="success"/>
|
||
|
</ARCS>
|
||
|
</NODE>
|
||
|
</CONFIGURATION>
|
||
|
<STATUS>
|
||
|
<LAST_EXECUTION_ID>wf_20210724_062705_620</LAST_EXECUTION_ID>
|
||
|
<LAST_EXECUTION_DATE>2021-07-25T13:25:37+00:00</LAST_EXECUTION_DATE>
|
||
|
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||
|
<LAST_EXECUTION_ERROR/>
|
||
|
</STATUS>
|
||
|
</BODY>
|
||
|
</RESOURCE_PROFILE>
|