mergin with branch beta
This commit is contained in:
commit
b954fe9ba8
|
@ -23,6 +23,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
|||
import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
|
@ -249,7 +250,24 @@ public class MappersTest {
|
|||
final Relation r1 = (Relation) list.get(1);
|
||||
final Relation r2 = (Relation) list.get(2);
|
||||
|
||||
assertEquals(d.getId(), r1.getSource());
|
||||
assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r1.getTarget());
|
||||
assertEquals(ModelConstants.RESULT_PROJECT, r1.getRelType());
|
||||
assertEquals(ModelConstants.OUTCOME, r1.getSubRelType());
|
||||
assertEquals(ModelConstants.IS_PRODUCED_BY, r1.getRelClass());
|
||||
assertTrue(r1.getValidated());
|
||||
assertEquals("2020-01-01", r1.getValidationDate());
|
||||
|
||||
assertEquals(d.getId(), r2.getTarget());
|
||||
assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r2.getSource());
|
||||
assertEquals(ModelConstants.RESULT_PROJECT, r2.getRelType());
|
||||
assertEquals(ModelConstants.OUTCOME, r2.getSubRelType());
|
||||
assertEquals(ModelConstants.PRODUCES, r2.getRelClass());
|
||||
assertTrue(r2.getValidated());
|
||||
assertEquals("2020-01-01", r2.getValidationDate());
|
||||
|
||||
assertValidId(d.getId());
|
||||
assertEquals("50|doi_________::000374d100a9db469bd42b69dbb40b36", d.getId());
|
||||
assertEquals(2, d.getOriginalId().size());
|
||||
assertTrue(d.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:zenodo.org:3234526")));
|
||||
assertValidId(d.getCollectedfrom().get(0).getKey());
|
||||
|
@ -303,10 +321,12 @@ public class MappersTest {
|
|||
});
|
||||
assertEquals("0001", d.getInstance().get(0).getRefereed().getClassid());
|
||||
assertNotNull(d.getInstance().get(0).getPid());
|
||||
assertTrue(d.getInstance().get(0).getPid().isEmpty());
|
||||
assertFalse(d.getInstance().get(0).getPid().isEmpty());
|
||||
|
||||
assertEquals("doi", d.getInstance().get(0).getAlternateIdentifier().get(0).getQualifier().getClassid());
|
||||
assertEquals("10.5281/zenodo.3234526", d.getInstance().get(0).getAlternateIdentifier().get(0).getValue());
|
||||
assertEquals("doi", d.getInstance().get(0).getPid().get(0).getQualifier().getClassid());
|
||||
assertEquals("10.5281/zenodo.3234526", d.getInstance().get(0).getPid().get(0).getValue());
|
||||
|
||||
assertTrue(d.getInstance().get(0).getAlternateIdentifier().isEmpty());
|
||||
|
||||
assertValidId(r1.getSource());
|
||||
assertValidId(r1.getTarget());
|
||||
|
@ -736,12 +756,11 @@ public class MappersTest {
|
|||
}
|
||||
|
||||
private void assertValidId(final String id) {
|
||||
System.out.println(id);
|
||||
// System.out.println(id);
|
||||
|
||||
assertEquals(49, id.length());
|
||||
assertEquals('|', id.charAt(2));
|
||||
assertEquals(':', id.charAt(15));
|
||||
assertEquals(':', id.charAt(16));
|
||||
assertEquals(IdentifierFactory.ID_PREFIX_SEPARATOR, id.substring(2, 3));
|
||||
assertEquals(IdentifierFactory.ID_SEPARATOR, id.substring(15, 17));
|
||||
}
|
||||
|
||||
private List<String> vocs() throws IOException {
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -108,7 +108,7 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
|
|||
END) AS NonOpenAccess
|
||||
FROM software s
|
||||
join result_organization ro on s.id=ro.id
|
||||
join SOURCER.organization o on o.id=ro.organization
|
||||
join organization o on o.id=ro.organization
|
||||
where cast(year as int)>=2003 and cast(year as int)<=2021
|
||||
group by year, country) tmp;
|
||||
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="08433e63-56a5-4ca3-bde9-cddac11c4b15_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-05-14T15:17:19+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Import bipFinder scores</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Import bipFinder scores</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="disabled">
|
||||
<NODE isStart="true" name="setBipScorePath" type="SetEnvParameter">
|
||||
<DESCRIPTION>declares the path holding the BIP SCORE data</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">bipScorePath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/bip/20201206</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="setLatestGraphPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="setLatestGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>declares the path holding the LATEST GRAPH dump</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">latestGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/stable_ids/graph/14_graph_blacklisted</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="prepareActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="prepareActionSets" type="PrepareActionSets">
|
||||
<DESCRIPTION>prepare action sets</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="sets" required="true" type="string">
|
||||
[
|
||||
{
|
||||
'set' : 'bipfinder-scores',
|
||||
'jobProperty' : 'export_action_set_bipfinder-scores',
|
||||
'enablingProperty' : 'active_bipfinder-scores',
|
||||
'enabled' : 'true'
|
||||
}
|
||||
]
|
||||
</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="extractOutputPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="extractOutputPath" type="ExtractOutputPath">
|
||||
<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="bipFinderScores"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="bipFinderScores" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>prepare AS for the bipFinder scores integration</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'bipScorePath':'bipScorePath',
|
||||
'inputPath':'latestGraphPath',
|
||||
'outputPath': 'outputPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/bipfinder/oozie_app',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/bipfinder'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="updateActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="updateActionSets" type="UpdateActionSets">
|
||||
<DESCRIPTION>update action sets</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID/>
|
||||
<LAST_EXECUTION_DATE/>
|
||||
<LAST_EXECUTION_STATUS/>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,144 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="6870f8c4-4e0e-414f-bd0d-2c8de5e7d1a5_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-07-13T14:00:07+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Import Datacite ActionSet</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setResumeFrom" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the resume from</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">resumeFrom</PARAM>
|
||||
<PARAM function="validValues(['TransformDatacite', ''])" managedBy="user" name="parameterValue" required="true" type="string">TransformDatacite</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="setExportLinks"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="setExportLinks" type="SetEnvParameter">
|
||||
<DESCRIPTION>shall the datacite mapping produce the links?</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">exportLinks</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="boolean">false</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="setDatacitePathTransformed"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="setDatacitePathTransformed" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the path storing the OAF Datacite records</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">oafTargetPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/datacite/production/datacite_oaf</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="setDatacitePath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="setDatacitePath" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the input path for Datacite content</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">datacitePath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/datacite</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="prepareActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="prepareActionSets" type="PrepareActionSets">
|
||||
<DESCRIPTION>prepare action sets</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="sets" required="true" type="string">
|
||||
[
|
||||
{
|
||||
'set' : 'datacite',
|
||||
'jobProperty' : 'export_action_set_datacite',
|
||||
'enablingProperty' : 'active_datacite',
|
||||
'enabled' : 'true'
|
||||
}
|
||||
]
|
||||
</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="extractOutputPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="extractOutputPath" type="ExtractOutputPath">
|
||||
<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="transformDatacite"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="transformDatacite" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>prepare a new version of Datacite ActionSet</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'mainPath' : 'datacitePath',
|
||||
'oafTargetPath' : 'oafTargetPath',
|
||||
'exportLinks' : 'exportLinks',
|
||||
'resumeFrom' : 'resumeFrom'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_import/oozie_app',
|
||||
'sparkExecutorMemory' : '7G'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="generateDataciteActionSet"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="generateDataciteActionSet" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>prepare a new version of Datacite ActionSet</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'oafTargetPath',
|
||||
'outputPath' : 'outputPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_actionset/oozie_app',
|
||||
'sparkExecutorMemory' : '7G'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="updateActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="updateActionSets" type="UpdateActionSets">
|
||||
<DESCRIPTION>update action sets</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210723_163342_752</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-07-23T16:44:05+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,200 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="e03f256e-1e4d-4b3d-9c07-91faf5d25208_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-07-13T15:15:19+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Import DOIboost</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setMAGDumpPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the input path for MAG</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">MAGDumpPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/mag-2021-02-15</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCROSSREFDumpPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the input path for CROSSREF dump</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">crossrefDumpPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/crossref/</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setIntemediatePathMAG" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the intermediate path used to process MAG</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">intermediatePathMAG</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/mag</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setInputPathCrossref" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the input path for Crossref</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathCrossref</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/crossref</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCrossrefTimestamp" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the timestamp for the Crossref incremental harvesting</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">crossrefTimestamp</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="false" type="string">1607614921429</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setInputPathUnpayWall" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the input path for UnpayWall</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathUnpayWall</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/unpayWall</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setInputPathOrcid" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the input path for ORCID</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">inputPathOrcid</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/orcid_activities_2020/last_orcid_dataset</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setWorkingPathOrcid" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the working path for ORCID</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">workingPathOrcid</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/orcid</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setHostedByMapPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the hostedBy map path</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">hostedByMapPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/doiboost/input/hostedBy/hbMap.gz</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setResumeFrom" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the oozie workflow name from which the execution will be resumed</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">resumeFrom</PARAM>
|
||||
<PARAM function="validValues(['ConvertCrossrefToOAF','PreprocessMag','PreprocessUW', 'ProcessORCID', 'CreateDOIBoost', 'GenerateActionSet'])" managedBy="user" name="parameterValue" required="false" type="string">ConvertCrossrefToOAF</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="prepareActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="prepareActionSets" type="PrepareActionSets">
|
||||
<DESCRIPTION>prepare action sets</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="sets" required="true" type="string">
|
||||
[
|
||||
{
|
||||
'set' : 'doiboost',
|
||||
'jobProperty' : 'export_action_set_doiboost',
|
||||
'enablingProperty' : 'active_doiboost',
|
||||
'enabled' : 'true'
|
||||
}
|
||||
]
|
||||
</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="extractOutputPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="extractOutputPath" type="ExtractOutputPath">
|
||||
<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="updateDOIBoost"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="updateDOIBoost" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>prepare a new version of DOIBoost</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'crossrefTimestamp' : 'crossrefTimestamp',
|
||||
'hostedByMapPath' : 'hostedByMapPath',
|
||||
'MAGDumpPath' :'MAGDumpPath',
|
||||
'inputPathMAG' : 'intermediatePathMAG',
|
||||
'inputPathCrossref' : 'inputPathCrossref',
|
||||
'crossrefDumpPath':'crossrefDumpPath',
|
||||
'inputPathUnpayWall' : 'inputPathUnpayWall',
|
||||
'inputPathOrcid' : 'inputPathOrcid',
|
||||
'outputPath' : 'outputPath',
|
||||
'workingPathOrcid':'workingPathOrcid',
|
||||
'resumeFrom' : 'resumeFrom'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/doiboost_process/oozie_app',
|
||||
'workingPath' : '/data/doiboost/process_p',
|
||||
'sparkExecutorCores' : '2',
|
||||
'sparkExecutorIntersectionMemory' : '12G',
|
||||
'sparkExecutorMemory' : '8G',
|
||||
'esServer' : '[es_server]',
|
||||
'esIndex' : 'crossref'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="updateActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="updateActionSets" type="UpdateActionSets">
|
||||
<DESCRIPTION>update action sets</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210714_075237_381</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-07-14T09:51:46+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,132 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="4bb067d5-a2f2-42b9-844c-4e1d8d71b80f_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-05-20T15:00:27+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Import H2020classification</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Import H2020classification</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setProjectFileURL" type="SetEnvParameter">
|
||||
<DESCRIPTION>sets the URL to download the project file</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">projectFileURL</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">https://cordis.europa.eu/data/cordis-h2020projects.csv</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setProgrammeFileURL" type="SetEnvParameter">
|
||||
<DESCRIPTION>sets the URL to download the programme file</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">programmeFileURL</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setTopicFileURL" type="SetEnvParameter">
|
||||
<DESCRIPTION>sets the URL to download the topics file</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">topicFileURL</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setSheetName" type="SetEnvParameter">
|
||||
<DESCRIPTION>sets the name of the sheet in the topic file to be read</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">sheetName</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">Topics</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="prepareActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="prepareActionSets" type="PrepareActionSets">
|
||||
<DESCRIPTION>prepare action sets</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="sets" required="true" type="string">
|
||||
[
|
||||
{
|
||||
'set' : 'h2020classification',
|
||||
'jobProperty' : 'export_action_set_h2020classification',
|
||||
'enablingProperty' : 'active_h2020classification',
|
||||
'enabled' : 'true'
|
||||
}
|
||||
]
|
||||
</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="extractOutputPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="extractOutputPath" type="ExtractOutputPath">
|
||||
<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="h2020ClassificationUpdate"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="h2020ClassificationUpdate" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>prepare updates for the H2020 Classification</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'outputPath': 'outputPath',
|
||||
'sheetName':'sheetName',
|
||||
'projectFileURL' : 'projectFileURL',
|
||||
'programmeFileURL' : 'programmeFileURL',
|
||||
'topicFileURL':'topicFileURL'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/project/oozie_app',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/h2020classification',
|
||||
'postgresURL':'',
|
||||
'postgresUser':'',
|
||||
'postgresPassword':''
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="updateActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="updateActionSets" type="UpdateActionSets">
|
||||
<DESCRIPTION>update action sets</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210524_084803_740</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-05-24T09:05:50+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,101 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="7c8765af-1253-4bd7-8806-315b73bf7319_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-07-15T16:06:50+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Import Orcid</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setInputPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the hdfs input path</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">inputPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/orcid_activities_2020</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="setProcessOutputPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="setProcessOutputPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the temporary path where to store the action set</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">processOutputPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/working_path_orcid_activities</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="prepareActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="prepareActionSets" type="PrepareActionSets">
|
||||
<DESCRIPTION>prepare action sets</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="sets" required="true" type="string">
|
||||
[
|
||||
{
|
||||
'set' : 'orcidworks-no-doi',
|
||||
'jobProperty' : 'export_action_set_orcidworks_no_doi',
|
||||
'enablingProperty' : 'active_orcidworks_no_doi',
|
||||
'enabled' : 'true'
|
||||
}
|
||||
]
|
||||
</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="extractOutputPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="extractOutputPath" type="ExtractOutputPath">
|
||||
<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="orcidNoDoiUpdate"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="orcidNoDoiUpdate" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>prepare updates for the Orcid No Doi</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'workingPath' : 'inputPath',
|
||||
'processOutputPath' : 'processOutputPath',
|
||||
'outputPath': 'outputPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/orcidnodoi_actionset/oozie_app',
|
||||
'spark2GenNoDoiDatasetMaxExecutors' : '200',
|
||||
'spark2GenNoDoiDatasetExecutorMemory' : '2G'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="updateActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="updateActionSets" type="UpdateActionSets">
|
||||
<DESCRIPTION>update action sets</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210713_170819_470</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-07-13T17:28:26+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,89 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="7cf3cfed-fbfb-46ca-b4da-aa43beb58f19_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-05-14T13:51:56+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Update ROR actionset</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Import Infospace</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setInputPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the base path containing the no_doi_dataset folder</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">inputPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/ror/ror-data-2021-04-06.json</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="prepareActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="prepareActionSets" type="PrepareActionSets">
|
||||
<DESCRIPTION>prepare action sets</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="sets" required="true" type="string">
|
||||
[
|
||||
{
|
||||
'set' : 'ror',
|
||||
'jobProperty' : 'export_action_set_ror',
|
||||
'enablingProperty' : 'active_ror',
|
||||
'enabled' : 'true'
|
||||
}
|
||||
]
|
||||
</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="extractOutputPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="extractOutputPath" type="ExtractOutputPath">
|
||||
<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="rorUpdate"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="rorUpdate" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>update the ROR actionset</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'rorJsonInputPath' : 'inputPath',
|
||||
'rorActionSetPath': 'outputPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/ror/oozie_app',
|
||||
'workingDir': '/tmp/import_ror_actionset_prod'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="updateActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="updateActionSets" type="UpdateActionSets">
|
||||
<DESCRIPTION>update action sets</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210518_143542_478</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-05-18T14:37:13+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,628 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="4801c33c-66ca-4ab6-af64-aa812194ec67_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-07-30T09:42:23+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Graph construction for IIS [BETA]</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>IIS</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setNsPrefixBlacklist" type="SetEnvParameter">
|
||||
<DESCRIPTION>set blacklist of funder nsPrefixes</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">nsPrefixBlacklist</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">gsrt________,rcuk________</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setIdMappingPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the path of the map defining the relations id mappings</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">idMappingPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/maps/fct_map.json</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setMergedGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the MERGED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">mergedGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_inference/graph/01_graph_merged</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setRawGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">rawGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_inference/graph/02_graph_raw</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCleanedFirstGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the CLEANED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">cleanedFirstGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_inference/graph/03_graph_clean_first</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setDedupGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the DEDUPED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_inference/graph/04_graph_dedup</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setConsistentGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the CONSISTENCY graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">consistentGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_inference/graph/05_graph_consistent</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCleanedGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the CLEANED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">cleanedGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_inference/graph/06_graph_cleaned</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setDedupConfig" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the dedup orchestrator name</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupConfig</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">dedup-similarity-result-decisiontree-v2</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="actionSetsRaw" type="SetEnvParameter">
|
||||
<DESCRIPTION>declares the ActionSet ids to promote in the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">actionSetIdsRawGraph</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">scholexplorer-dump,doiboost,orcidworks-no-doi,datacite</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setIsLookUpUrl" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the IS lookup service address</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">isLookUpUrl</PARAM>
|
||||
<PARAM managedBy="system" name="parameterValue" required="true" type="string">http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="reuseODFClaims_PROD"/>
|
||||
<ARC to="reuseODFClaims_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODFClaims_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF claims from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODFClaims_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseODF_hdfs_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODF_hdfs_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF records on HDFS from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODFhdfs_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseOAFClaims_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAFClaims_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF claims from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAFClaims_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseOAF_hdfs_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAF_hdfs_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF records on HDFS from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAFhdfs_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseDB_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseDB_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached DB content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseDB_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseDBOpenorgs_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseDBOpenorgs_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OpenOrgs content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseDBOpenorgs_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseODF_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODF_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODF_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseOAF_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAF_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAF_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="patchRelations_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="patchRelations_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>should apply the relations id patching based on the provided idMapping on PROD?</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">shouldPatchRelations_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">false</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="contentPathProd"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="contentPathProd" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the PROD aggregator content path</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">prodContentPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_aggregator_for_beta</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="prodAggregatorGraphPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="prodAggregatorGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the path containing the PROD AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">prodAggregatorGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_inference/graph/00_prod_graph_aggregator</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODFClaims_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF claims from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODFClaims_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseODF_hdfs_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODF_hdfs_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF records on HDFS from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODFhdfs_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseOAFClaims_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAFClaims_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF claims from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAFClaims_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseOAF_hdfs_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAF_hdfs_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF records on HDFS from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAFhdfs_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseDB_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseDB_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached DB content from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseDB_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseDBOpenorgs_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseDBOpenorgs_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OpenOrgs content from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseDBOpenorgs_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseODF_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODF_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF content from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODF_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseOAF_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAF_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF content from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAF_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="patchRelations_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="patchRelations_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>should apply the relations id patching based on the provided idMapping on BETA?</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">shouldPatchRelations_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">false</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="contentPathBeta"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="contentPathBeta" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the BETA aggregator content path</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">betaContentPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_aggregator</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="betaAggregatorGraphPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="betaAggregatorGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the path containing the BETA AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">betaAggregatorGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_inference/graph/00_beta_graph_aggregator</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig2">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="betaAggregatorGraph"/>
|
||||
<ARC to="prodAggregatorGraph"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="betaAggregatorGraph" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the BETA AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphOutputPath' : 'betaAggregatorGraphPath',
|
||||
'isLookupUrl' : 'isLookUpUrl',
|
||||
'reuseODFClaims' : 'reuseODFClaims_BETA',
|
||||
'reuseOAFClaims' : 'reuseOAFClaims_BETA',
|
||||
'reuseDB' : 'reuseDB_BETA',
|
||||
'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA',
|
||||
'reuseODF' : 'reuseODF_BETA',
|
||||
'reuseODF_hdfs' : 'reuseODFhdfs_BETA',
|
||||
'reuseOAF' : 'reuseOAF_BETA',
|
||||
'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA',
|
||||
'contentPath' : 'betaContentPath',
|
||||
'nsPrefixBlacklist' : 'nsPrefixBlacklist',
|
||||
'shouldPatchRelations' : 'shouldPatchRelations_BETA',
|
||||
'idMappingPath' : 'idMappingPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app',
|
||||
'mongoURL' : '',
|
||||
'mongoDb' : '',
|
||||
'mdstoreManagerUrl' : '',
|
||||
'postgresURL' : '',
|
||||
'postgresUser' : '',
|
||||
'postgresPassword' : '',
|
||||
'postgresOpenOrgsURL' : '',
|
||||
'postgresOpenOrgsUser' : '',
|
||||
'postgresOpenOrgsPassword' : '',
|
||||
'shouldHashId' : 'true',
|
||||
'importOpenorgs' : 'true',
|
||||
'workingDir' : '/tmp/beta_inference/working_dir/beta_aggregator'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitAggregatorGraph"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="prodAggregatorGraph" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the PROD AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphOutputPath' : 'prodAggregatorGraphPath',
|
||||
'isLookupUrl' : 'isLookUpUrl',
|
||||
'reuseODFClaims' : 'reuseODFClaims_PROD',
|
||||
'reuseOAFClaims' : 'reuseOAFClaims_PROD',
|
||||
'reuseDB' : 'reuseDB_PROD',
|
||||
'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD',
|
||||
'reuseODF' : 'reuseODF_PROD',
|
||||
'reuseODF_hdfs' : 'reuseODFhdfs_PROD',
|
||||
'reuseOAF' : 'reuseOAF_PROD',
|
||||
'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD',
|
||||
'contentPath' : 'prodContentPath',
|
||||
'nsPrefixBlacklist' : 'nsPrefixBlacklist',
|
||||
'shouldPatchRelations' : 'shouldPatchRelations_PROD',
|
||||
'idMappingPath' : 'idMappingPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app',
|
||||
'mongoURL' : '',
|
||||
'mongoDb' : '',
|
||||
'mdstoreManagerUrl' : '',
|
||||
'postgresURL' : '',
|
||||
'postgresUser' : '',
|
||||
'postgresPassword' : '',
|
||||
'postgresOpenOrgsURL' : '',
|
||||
'postgresOpenOrgsUser' : '',
|
||||
'postgresOpenOrgsPassword' : '',
|
||||
'shouldHashId' : 'true',
|
||||
'importOpenorgs' : 'true',
|
||||
'workingDir' : '/tmp/beta_inference/working_dir/prod_aggregator'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitAggregatorGraph"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitAggregatorGraph">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="mergeAggregatorGraphs"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="mergeAggregatorGraphs" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'betaInputGraphPath' : 'betaAggregatorGraphPath',
|
||||
'prodInputGraphPath' : 'prodAggregatorGraphPath',
|
||||
'graphOutputPath' : 'mergedGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app',
|
||||
'workingDir' : '/tmp/beta_inference/working_dir/merge_graph',
|
||||
'priority' : 'BETA'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="promoteActionsRaw"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="promoteActionsRaw" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'inputActionSetIds' : 'actionSetIdsRawGraph',
|
||||
'inputGraphRootPath' : 'mergedGraphPath',
|
||||
'outputGraphRootPath' : 'rawGraphPath',
|
||||
'isLookupUrl' : 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G',
|
||||
'activePromoteDatasetActionPayload' : 'true',
|
||||
'activePromoteDatasourceActionPayload' : 'true',
|
||||
'activePromoteOrganizationActionPayload' : 'true',
|
||||
'activePromoteOtherResearchProductActionPayload' : 'true',
|
||||
'activePromoteProjectActionPayload' : 'true',
|
||||
'activePromotePublicationActionPayload' : 'true',
|
||||
'activePromoteRelationActionPayload' : 'true',
|
||||
'activePromoteResultActionPayload' : 'true',
|
||||
'activePromoteSoftwareActionPayload' : 'true',
|
||||
'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
|
||||
'workingDir' : '/tmp/beta_inference/working_dir/promoteActionsRaw'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="graphCleaningFirst"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="graphCleaningFirst" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphInputPath' : 'rawGraphPath',
|
||||
'graphOutputPath': 'cleanedFirstGraphPath',
|
||||
'isLookupUrl': 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app',
|
||||
'workingDir' : '/tmp/beta_inference/working_dir/clean_first'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="duplicateScan"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="duplicateScan" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>search for duplicates in the raw graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'actionSetId' : 'dedupConfig',
|
||||
'graphBasePath' : 'cleanedFirstGraphPath',
|
||||
'dedupGraphPath': 'dedupGraphPath',
|
||||
'isLookUpUrl' : 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app',
|
||||
'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple',
|
||||
'workingPath' : '/tmp/beta_inference/working_dir/dedup',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="dedupConsistency"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="dedupConsistency" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>mark duplicates as deleted and redistribute the relationships</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphBasePath' : 'dedupGraphPath',
|
||||
'graphOutputPath': 'consistentGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app',
|
||||
'workingPath' : '/tmp/beta_inference/working_dir/dedup'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="graphCleaning"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="graphCleaning" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphInputPath' : 'consistentGraphPath',
|
||||
'graphOutputPath': 'cleanedGraphPath',
|
||||
'isLookupUrl': 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app',
|
||||
'workingDir' : '/tmp/beta_inference/working_dir/clean'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210730_094240_462</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-07-30T15:04:19+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,437 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="4801c33c-66ca-4ab6-af64-aa812194ec69_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-07-29T14:28:39+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Graph construction for IIS [PROD NEW]</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>IIS</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setNsPrefixBlacklist" type="SetEnvParameter">
|
||||
<DESCRIPTION>set blacklist of funder nsPrefixes</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">nsPrefixBlacklist</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setIdMappingPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the path of the map defining the relations id mappings</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">idMappingPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/maps/fct_map.json</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="aggregatorGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the path containing the PROD AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">aggregatorGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_inference/graph/00_graph_aggregator</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setRawGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">rawGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_inference/graph/01_graph_raw</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCleanedFirstGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the CLEANED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">cleanedFirstGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_inference/graph/02_graph_clean_first</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setDedupGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the DEDUPED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_inference/graph/03_graph_dedup</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setConsistentGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the CONSISTENCY graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">consistentGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_inference/graph/04_graph_consistent</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCleanedGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the CLEANED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">cleanedGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_inference/graph/05_graph_cleaned</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setDedupConfig" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the dedup orchestrator name</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupConfig</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">dedup-similarity-result-decisiontree-v2</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="actionSetsRaw" type="SetEnvParameter">
|
||||
<DESCRIPTION>declares the ActionSet ids to promote in the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">actionSetIdsRawGraph</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">scholexplorer-dump,doiboost,orcidworks-no-doi,datacite</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setIsLookUpUrl" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the IS lookup service address</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">isLookUpUrl</PARAM>
|
||||
<PARAM managedBy="system" name="parameterValue" required="true" type="string">http://services.openaire.eu:8280/is/services/isLookUp?wsdl</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="reuseODFClaims"/>
|
||||
<ARC to="reuseOAFClaims"/>
|
||||
<ARC to="reuseODF_hdfs"/>
|
||||
<ARC to="reuseOAF_hdfs"/>
|
||||
<ARC to="reuseODF"/>
|
||||
<ARC to="reuseOAF"/>
|
||||
<ARC to="reuseDB"/>
|
||||
<ARC to="reuseDBOpenorgs"/>
|
||||
<ARC to="patchRelations"/>
|
||||
<ARC to="contentPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODFClaims" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF claims from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODFClaims</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAFClaims" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF claims from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAFClaims</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODF_hdfs" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF records on HDFS from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODFhdfs</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAF_hdfs" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF records on HDFS from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAFhdfs</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODF" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODF</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAF" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAF</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseDB" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached DB content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseDB</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseDBOpenorgs" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OpenOrgs content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseDBOpenorgs</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="patchRelations" type="SetEnvParameter">
|
||||
<DESCRIPTION>should apply the relations id patching based on the provided idMapping?</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">shouldPatchRelations</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">false</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="contentPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the PROD aggregator content path</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">contentPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_aggregator</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig2">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="aggregatorGraph"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="aggregatorGraph" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the PROD AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphOutputPath' : 'aggregatorGraphPath',
|
||||
'isLookupUrl' : 'isLookUpUrl',
|
||||
'reuseODFClaims' : 'reuseODFClaims',
|
||||
'reuseOAFClaims' : 'reuseOAFClaims',
|
||||
'reuseDB' : 'reuseDB',
|
||||
'reuseDBOpenorgs' : 'reuseDBOpenorgs',
|
||||
'reuseODF' : 'reuseODF',
|
||||
'reuseODF_hdfs' : 'reuseODFhdfs',
|
||||
'reuseOAF' : 'reuseOAF',
|
||||
'reuseOAF_hdfs' : 'reuseOAFhdfs',
|
||||
'contentPath' : 'contentPath',
|
||||
'nsPrefixBlacklist' : 'nsPrefixBlacklist',
|
||||
'shouldPatchRelations' : 'shouldPatchRelations',
|
||||
'idMappingPath' : 'idMappingPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app',
|
||||
'mongoURL' : '',
|
||||
'mongoDb' : '',
|
||||
'mdstoreManagerUrl' : '',
|
||||
'postgresURL' : '',
|
||||
'postgresUser' : '',
|
||||
'postgresPassword' : '',
|
||||
'postgresOpenOrgsURL' : '',
|
||||
'postgresOpenOrgsUser' : '',
|
||||
'postgresOpenOrgsPassword' : '',
|
||||
'shouldHashId' : 'true',
|
||||
'importOpenorgs' : 'true',
|
||||
'workingDir' : '/tmp/prod_inference/working_dir/prod_aggregator'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="promoteActionsRaw"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="promoteActionsRaw" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'inputActionSetIds' : 'actionSetIdsRawGraph',
|
||||
'inputGraphRootPath' : 'aggregatorGraphPath',
|
||||
'outputGraphRootPath' : 'rawGraphPath',
|
||||
'isLookupUrl' : 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G',
|
||||
'activePromoteDatasetActionPayload' : 'true',
|
||||
'activePromoteDatasourceActionPayload' : 'true',
|
||||
'activePromoteOrganizationActionPayload' : 'true',
|
||||
'activePromoteOtherResearchProductActionPayload' : 'true',
|
||||
'activePromoteProjectActionPayload' : 'true',
|
||||
'activePromotePublicationActionPayload' : 'true',
|
||||
'activePromoteRelationActionPayload' : 'true',
|
||||
'activePromoteResultActionPayload' : 'true',
|
||||
'activePromoteSoftwareActionPayload' : 'true',
|
||||
'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
|
||||
'workingDir' : '/tmp/prod_inference/working_dir/promoteActionsRaw'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="graphCleaningFirst"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="graphCleaningFirst" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphInputPath' : 'rawGraphPath',
|
||||
'graphOutputPath': 'cleanedFirstGraphPath',
|
||||
'isLookupUrl': 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app',
|
||||
'workingDir' : '/tmp/prod_inference/working_dir/clean_first'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="duplicateScan"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="duplicateScan" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>search for duplicates in the raw graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'actionSetId' : 'dedupConfig',
|
||||
'graphBasePath' : 'cleanedFirstGraphPath',
|
||||
'dedupGraphPath': 'dedupGraphPath',
|
||||
'isLookUpUrl' : 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app',
|
||||
'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple',
|
||||
'workingPath' : '/tmp/prod_inference/working_dir/dedup',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="dedupConsistency"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="dedupConsistency" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>mark duplicates as deleted and redistribute the relationships</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphBasePath' : 'dedupGraphPath',
|
||||
'graphOutputPath': 'consistentGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app',
|
||||
'workingPath' : '/tmp/prod_inference/working_dir/dedup'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="graphCleaning"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="graphCleaning" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphInputPath' : 'consistentGraphPath',
|
||||
'graphOutputPath': 'cleanedGraphPath',
|
||||
'isLookupUrl': 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app',
|
||||
'workingDir' : '/tmp/prod_inference/working_dir/clean'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210719_165159_86</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-07-19T20:45:09+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,225 @@
|
|||
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="47340fe4-36e1-41cb-9660-b13eeeb874be_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-08-06T09:15:17+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>IIS main workflow V3 [PROD]</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>IIS</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="start">
|
||||
<DESCRIPTION>start</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="setFundersBlacklist"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="setFundersBlacklist" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set a regex of funder shortnames to exclude from the project reference processing</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">referenceextraction_project_fundingclass_blacklist_regex</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">^DFG::.*$|^CONICYT::.*$|^RSF::.*$|^SGOV::.*$|^GSRT::.*$|^MIUR::.*$|^INNOVIRIS::.*$|^RIF::.*$|^SFRS::.*$</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="prepareActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="prepareActionSets" type="PrepareActionSets">
|
||||
<DESCRIPTION>prepare action sets</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="sets" required="true" type="string">
|
||||
[
|
||||
{
|
||||
'set' : 'iis-document-affiliation',
|
||||
'jobProperty' : 'export_action_set_id_matched_doc_organizations',
|
||||
'enablingProperty' : 'active_document_affiliation',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-referenced-projects-main',
|
||||
'jobProperty' : 'export_action_set_id_document_referencedProjects',
|
||||
'enablingProperty' : 'active_referenceextraction_project',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-referenced-datasets-main',
|
||||
'jobProperty' : 'export_action_set_id_document_referencedDatasets',
|
||||
'enablingProperty' : 'active_referenceextraction_dataset',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-researchinitiative',
|
||||
'jobProperty' : 'export_action_set_id_document_research_initiative',
|
||||
'enablingProperty' : 'active_referenceextraction_researchinitiative',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-document-similarities',
|
||||
'jobProperty' : 'export_action_set_id_document_similarities_standard',
|
||||
'enablingProperty' : 'active_documentssimilarity',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-document-classes',
|
||||
'jobProperty' : 'export_action_set_id_document_classes',
|
||||
'enablingProperty' : 'active_documentsclassification',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-document-citations',
|
||||
'jobProperty' : 'export_action_set_id_document_referencedDocuments',
|
||||
'enablingProperty' : 'active_citationmatching',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-document-citations-relations',
|
||||
'jobProperty' : 'export_action_set_id_citation_relations',
|
||||
'enablingProperty' : 'active_citationmatching_relations',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-referenceextraction-pdb',
|
||||
'jobProperty' : 'export_action_set_id_document_pdb',
|
||||
'enablingProperty' : 'active_referenceextraction_pdb',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'document_software_url',
|
||||
'jobProperty' : 'export_action_set_id_document_software_url',
|
||||
'enablingProperty' : 'active_referenceextraction_software_url',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-entities-software',
|
||||
'jobProperty' : 'export_action_set_id_entity_software',
|
||||
'enablingProperty' : 'active_referenceextraction_software_url',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-communities',
|
||||
'jobProperty' : 'export_action_set_id_document_community',
|
||||
'enablingProperty' : 'active_referenceextraction_community',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-referenced-patents',
|
||||
'jobProperty' : 'export_action_set_id_document_patent',
|
||||
'enablingProperty' : 'active_referenceextraction_patent',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-entities-patent',
|
||||
'jobProperty' : 'export_action_set_id_entity_patent',
|
||||
'enablingProperty' : 'active_referenceextraction_patent',
|
||||
'enabled' : 'true'
|
||||
},
|
||||
{
|
||||
'set' : 'iis-covid-19',
|
||||
'jobProperty' : 'export_action_set_id_document_covid19',
|
||||
'enablingProperty' : 'active_referenceextraction_covid19',
|
||||
'enabled' : 'true'
|
||||
}
|
||||
]
|
||||
</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="prepareParameters"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="prepareParameters" type="PrepareIisMainParamsV2">
|
||||
<DESCRIPTION>prepare parameters</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="islookupLocationParam" required="true" type="string">import_islookup_service_location</PARAM>
|
||||
<PARAM managedBy="system" name="objectStoreParam" required="true" type="string">import_content_objectstores_csv</PARAM>
|
||||
<PARAM managedBy="system" name="objectStoreLocationParam" required="true" type="string">import_content_object_store_location</PARAM>
|
||||
<PARAM managedBy="system" name="mdStoreStoreLocationParam" required="true" type="string">import_mdstore_service_location</PARAM>
|
||||
<PARAM managedBy="system" name="mdStoreDatasetParam" required="true" type="string">import_dataset_mdstore_ids_csv</PARAM>
|
||||
<PARAM managedBy="system" name="oozieWfAppPathParam" required="true" type="string">oozie.wf.application.path</PARAM>
|
||||
<PARAM managedBy="user" name="oozieWfAppPath" required="true" type="string">/lib/iis/primary/snapshots/2021-06-23</PARAM>
|
||||
<PARAM function="validValues(['IIS','DM'])" managedBy="user" name="clusterName" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="user" name="importHbaseDumpLocation" required="true" type="string">/tmp/prod_inference/graph/05_graph_cleaned</PARAM>
|
||||
<PARAM managedBy="system" name="importHbaseDumpLocationParam" required="true" type="string">import_infospace_graph_location</PARAM>
|
||||
<PARAM managedBy="user" name="objectStoreBlacklistCSV" required="false" type="string"/>
|
||||
<PARAM managedBy="system" name="importProjectConceptsContextCSVParam" required="true" type="string">import_project_concepts_context_ids_csv</PARAM>
|
||||
<PARAM managedBy="user" name="importProjectConceptsContextCSV" required="true" type="string">aginfra,beopen,clarin,covid-19,dariah,dh-ch,oa-pg,egi,elixir-gr,enermaps,epos,fam,fet-fp7,fet-h2020,gotriple,instruct,mes,ni,rda,science-innovation-policy,risis,rural-digital-europe,sdsn-gr,sobigdata</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="main"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="main" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>IIS main</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">iisMainJobV3</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'cluster' : 'cluster',
|
||||
'oozie.wf.application.path' : 'oozie.wf.application.path',
|
||||
'referenceextraction_project_fundingclass_blacklist_regex' : 'referenceextraction_project_fundingclass_blacklist_regex',
|
||||
|
||||
'active_document_affiliation' : 'active_document_affiliation',
|
||||
'active_referenceextraction_project' : 'active_referenceextraction_project',
|
||||
'active_referenceextraction_dataset' : 'active_referenceextraction_dataset',
|
||||
'active_referenceextraction_researchinitiative' : 'active_referenceextraction_researchinitiative',
|
||||
'active_documentsclassification' : 'active_documentsclassification',
|
||||
'active_documentssimilarity' : 'active_documentssimilarity',
|
||||
'active_citationmatching' : 'active_citationmatching',
|
||||
'active_citationmatching_relations' : 'active_citationmatching_relations',
|
||||
'active_referenceextraction_pdb' : 'active_referenceextraction_pdb',
|
||||
'active_referenceextraction_software_url' : 'active_referenceextraction_software_url',
|
||||
'active_referenceextraction_community' : 'active_referenceextraction_community',
|
||||
'active_referenceextraction_patent' : 'active_referenceextraction_patent',
|
||||
'active_referenceextraction_covid19' : 'active_referenceextraction_covid19',
|
||||
|
||||
'import_content_objectstores_csv' : 'import_content_objectstores_csv',
|
||||
'import_content_object_store_location' : 'import_content_object_store_location',
|
||||
'import_mdstore_service_location' : 'import_mdstore_service_location',
|
||||
'import_islookup_service_location' : 'import_islookup_service_location',
|
||||
'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv',
|
||||
'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv',
|
||||
'import_infospace_graph_location' : 'import_infospace_graph_location',
|
||||
|
||||
'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations',
|
||||
'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets',
|
||||
'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects',
|
||||
'export_action_set_id_document_research_initiative' : 'export_action_set_id_document_research_initiative',
|
||||
'export_action_set_id_document_similarities_standard' : 'export_action_set_id_document_similarities_standard',
|
||||
|
||||
'export_action_set_id_document_referencedDocuments' : 'export_action_set_id_document_referencedDocuments',
|
||||
'export_action_set_id_document_pdb' : 'export_action_set_id_document_pdb',
|
||||
'export_action_set_id_document_software_url' : 'export_action_set_id_document_software_url',
|
||||
'export_action_set_id_entity_software' : 'export_action_set_id_entity_software',
|
||||
'export_action_set_id_document_community' : 'export_action_set_id_document_community',
|
||||
'export_action_set_id_document_patent' : 'export_action_set_id_document_patent',
|
||||
'export_action_set_id_entity_patent' : 'export_action_set_id_entity_patent',
|
||||
'export_action_set_id_document_covid19' : 'export_action_set_id_document_covid19',
|
||||
'export_action_set_id_document_classes' : 'export_action_set_id_document_classes'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="user" name="simulation" required="true" type="boolean">false</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="updateActionSets"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="updateActionSets" type="UpdateActionSets">
|
||||
<DESCRIPTION>update action sets</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210719_221139_780</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-07-21T01:23:13+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,995 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="b05c97e6-69b5-497d-87fd-2137d3ff2c2e_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-08-03T13:43:44+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Graph Construction [BETA]</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setNsPrefixBlacklist" type="SetEnvParameter">
|
||||
<DESCRIPTION>set blacklist of funder nsPrefixes</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">nsPrefixBlacklist</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">gsrt________,rcuk________</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setIdMappingPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the path of the map defining the relations id mappings</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">idMappingPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/data/maps/fct_map.json</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setMergedGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the MERGED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">mergedGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/01_graph_merged</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setRawGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">rawGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/02_graph_raw</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCleanedFirstGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the the consistent graph cleaned</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">cleanedFirstGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/03_graph_cleaned</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setDedupGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the DEDUPED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/04_graph_dedup</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setInferredGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the INFERRED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">inferredGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/05_graph_inferred</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setConsistentGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the CONSISTENCY graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">consistentGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/06_graph_consistent</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setOrcidGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the ORCID enriched graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">orcidGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/07_graph_orcid</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setBulkTaggingGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the BULK TAGGED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">bulkTaggingGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/08_graph_bulktagging</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setAffiliationGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">affiliationGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/09_graph_affiliation</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCommunityOrganizationGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the COMMUNITY from SELECTED SOURCES graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">communityOrganizationGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/10_graph_comunity_organization</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setFundingGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the FUNDING from SEMANTIC RELATION graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">fundingGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/11_graph_funding</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCommunitySemRelGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the COMMUNITY from SEMANTIC RELATION graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">communitySemRelGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/12_graph_comunity_sem_rel</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCountryGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the COUNTRY enriched graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">countryGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/13_graph_country</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCleanedGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the CLEANED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">cleanedGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/14_graph_cleaned</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setBlacklistedGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the blacklisted graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">blacklistedGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/15_graph_blacklisted</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setBulkTaggingPathMap" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the map of paths for the Bulk Tagging</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">bulkTaggingPathMap</PARAM>
|
||||
<PARAM managedBy="system" name="parameterValue" required="true" type="string">{"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"}</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setPropagationOrganizationCommunityMap" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the map of associations organization, community list for the propagation of community to result through organization</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">propagationOrganizationCommunityMap</PARAM>
|
||||
<PARAM managedBy="system" name="parameterValue" required="true" type="string">{"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"],
|
||||
"20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]}
|
||||
</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setDedupConfig" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the dedup orchestrator name</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupConfig</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">dedup-similarity-result-decisiontree-v2</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="actionSetsRaw" type="SetEnvParameter">
|
||||
<DESCRIPTION>declares the ActionSet ids to promote in the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">actionSetIdsRawGraph</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="actionSetsIIS" type="SetEnvParameter">
|
||||
<DESCRIPTION>declares the ActionSet ids to promote in the INFERRED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">actionSetIdsIISGraph</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setIsLookUpUrl" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the IS lookup service address</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">isLookUpUrl</PARAM>
|
||||
<PARAM managedBy="system" name="parameterValue" required="true" type="string">http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="reuseODFClaims_PROD"/>
|
||||
<ARC to="reuseODFClaims_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODFClaims_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF claims from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODFClaims_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseODF_hdfs_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODF_hdfs_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF records on HDFS from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODFhdfs_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseOAFClaims_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAFClaims_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF claims from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAFClaims_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseOAF_hdfs_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAF_hdfs_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF records on HDFS from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAFhdfs_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseDB_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseDB_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached DB content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseDB_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseDBOpenorgs_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseDBOpenorgs_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OpenOrgs content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseDBOpenorgs_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseODF_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODF_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODF_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseOAF_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAF_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAF_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="patchRelations_PROD"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="patchRelations_PROD" type="SetEnvParameter">
|
||||
<DESCRIPTION>should apply the relations id patching based on the provided idMapping on PROD?</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">shouldPatchRelations_PROD</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="contentPathProd"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="contentPathProd" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the PROD aggregator content path</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">prodContentPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_aggregator_for_beta</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="prodAggregatorGraphPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="prodAggregatorGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the path containing the PROD AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">prodAggregatorGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/00_prod_graph_aggregator</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODFClaims_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF claims from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODFClaims_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseODF_hdfs_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODF_hdfs_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF records on HDFS from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODFhdfs_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseOAFClaims_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAFClaims_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF claims from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAFClaims_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseOAF_hdfs_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAF_hdfs_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF records on HDFS from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAFhdfs_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseDB_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseDB_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached DB content from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseDB_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseDBOpenorgs_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseDBOpenorgs_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OpenOrgs content from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseDBOpenorgs_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseODF_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODF_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF content from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODF_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="reuseOAF_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAF_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF content from the BETA aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAF_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="patchRelations_BETA"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="patchRelations_BETA" type="SetEnvParameter">
|
||||
<DESCRIPTION>should apply the relations id patching based on the provided idMapping on BETA?</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">shouldPatchRelations_BETA</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="contentPathBeta"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="contentPathBeta" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the BETA aggregator content path</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">betaContentPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_aggregator</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="betaAggregatorGraphPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="betaAggregatorGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the path containing the BETA AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">betaAggregatorGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/00_beta_graph_aggregator</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig2">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="betaAggregatorGraph"/>
|
||||
<ARC to="prodAggregatorGraph"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="betaAggregatorGraph" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the BETA AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphOutputPath' : 'betaAggregatorGraphPath',
|
||||
'isLookupUrl' : 'isLookUpUrl',
|
||||
'reuseODFClaims' : 'reuseODFClaims_BETA',
|
||||
'reuseOAFClaims' : 'reuseOAFClaims_BETA',
|
||||
'reuseDB' : 'reuseDB_BETA',
|
||||
'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA',
|
||||
'reuseODF' : 'reuseODF_BETA',
|
||||
'reuseODF_hdfs' : 'reuseODFhdfs_BETA',
|
||||
'reuseOAF' : 'reuseOAF_BETA',
|
||||
'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA',
|
||||
'contentPath' : 'betaContentPath',
|
||||
'nsPrefixBlacklist' : 'nsPrefixBlacklist',
|
||||
'shouldPatchRelations' : 'shouldPatchRelations_BETA',
|
||||
'idMappingPath' : 'idMappingPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app',
|
||||
'mongoURL' : '',
|
||||
'mongoDb' : '',
|
||||
'mdstoreManagerUrl' : '',
|
||||
'postgresURL' : '',
|
||||
'postgresUser' : '',
|
||||
'postgresPassword' : '',
|
||||
'postgresOpenOrgsURL' : '',
|
||||
'postgresOpenOrgsUser' : '',
|
||||
'postgresOpenOrgsPassword' : '',
|
||||
'shouldHashId' : 'true',
|
||||
'importOpenorgs' : 'true',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/beta_aggregator'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitAggregatorGraph"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="prodAggregatorGraph" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the PROD AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphOutputPath' : 'prodAggregatorGraphPath',
|
||||
'isLookupUrl' : 'isLookUpUrl',
|
||||
'reuseODFClaims' : 'reuseODFClaims_PROD',
|
||||
'reuseOAFClaims' : 'reuseOAFClaims_PROD',
|
||||
'reuseDB' : 'reuseDB_PROD',
|
||||
'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD',
|
||||
'reuseODF' : 'reuseODF_PROD',
|
||||
'reuseODF_hdfs' : 'reuseODFhdfs_PROD',
|
||||
'reuseOAF' : 'reuseOAF_PROD',
|
||||
'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD',
|
||||
'contentPath' : 'prodContentPath',
|
||||
'nsPrefixBlacklist' : 'nsPrefixBlacklist',
|
||||
'shouldPatchRelations' : 'shouldPatchRelations_PROD',
|
||||
'idMappingPath' : 'idMappingPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app',
|
||||
'mongoURL' : '',
|
||||
'mongoDb' : '',
|
||||
'mdstoreManagerUrl' : '',
|
||||
'postgresURL' : '',
|
||||
'postgresUser' : '',
|
||||
'postgresPassword' : '',
|
||||
'postgresOpenOrgsURL' : '',
|
||||
'postgresOpenOrgsUser' : '',
|
||||
'postgresOpenOrgsPassword' : '',
|
||||
'shouldHashId' : 'true',
|
||||
'importOpenorgs' : 'true',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/prod_aggregator'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitAggregatorGraph"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitAggregatorGraph">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="mergeAggregatorGraphs"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="mergeAggregatorGraphs" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'betaInputGraphPath' : 'betaAggregatorGraphPath',
|
||||
'prodInputGraphPath' : 'prodAggregatorGraphPath',
|
||||
'graphOutputPath' : 'mergedGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/merge_graph',
|
||||
'priority' : 'BETA'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="promoteActionsRaw"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="promoteActionsRaw" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'inputActionSetIds' : 'actionSetIdsRawGraph',
|
||||
'inputGraphRootPath' : 'mergedGraphPath',
|
||||
'outputGraphRootPath' : 'rawGraphPath',
|
||||
'isLookupUrl' : 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G',
|
||||
'activePromoteDatasetActionPayload' : 'true',
|
||||
'activePromoteDatasourceActionPayload' : 'true',
|
||||
'activePromoteOrganizationActionPayload' : 'true',
|
||||
'activePromoteOtherResearchProductActionPayload' : 'true',
|
||||
'activePromoteProjectActionPayload' : 'true',
|
||||
'activePromotePublicationActionPayload' : 'true',
|
||||
'activePromoteRelationActionPayload' : 'true',
|
||||
'activePromoteResultActionPayload' : 'true',
|
||||
'activePromoteSoftwareActionPayload' : 'true',
|
||||
'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsRaw'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="graphCleaningFirst"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="graphCleaningFirst" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphInputPath' : 'rawGraphPath',
|
||||
'graphOutputPath': 'cleanedFirstGraphPath',
|
||||
'isLookupUrl': 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/clean'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="duplicateScan"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="duplicateScan" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>search for duplicates in the raw graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'actionSetId' : 'dedupConfig',
|
||||
'graphBasePath' : 'cleanedFirstGraphPath',
|
||||
'dedupGraphPath': 'dedupGraphPath',
|
||||
'isLookUpUrl' : 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app',
|
||||
'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple',
|
||||
'workingPath' : '/tmp/beta_provision/working_dir/dedup',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="promoteActionsIIS"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="promoteActionsIIS" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the INFERRED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'inputActionSetIds' : 'actionSetIdsIISGraph',
|
||||
'inputGraphRootPath' : 'dedupGraphPath',
|
||||
'outputGraphRootPath' : 'inferredGraphPath',
|
||||
'isLookupUrl' : 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G',
|
||||
'activePromoteDatasetActionPayload' : 'true',
|
||||
'activePromoteDatasourceActionPayload' : 'true',
|
||||
'activePromoteOrganizationActionPayload' : 'true',
|
||||
'activePromoteOtherResearchProductActionPayload' : 'true',
|
||||
'activePromoteProjectActionPayload' : 'true',
|
||||
'activePromotePublicationActionPayload' : 'true',
|
||||
'activePromoteRelationActionPayload' : 'true',
|
||||
'activePromoteResultActionPayload' : 'true',
|
||||
'activePromoteSoftwareActionPayload' : 'true',
|
||||
'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsIIS'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="dedupConsistency"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="dedupConsistency" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>mark duplicates as deleted and redistribute the relationships</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphBasePath' : 'inferredGraphPath',
|
||||
'graphOutputPath': 'consistentGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app',
|
||||
'workingPath' : '/tmp/beta_provision/working_dir/dedup'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="orcidPropagation"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
|
||||
<NODE name="orcidPropagation" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>propagates ORCID among results linked by allowedsemrels semantic relationships</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'consistentGraphPath',
|
||||
'outputPath': 'orcidGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/orcidtoresultfromsemrel/oozie_app',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/orcid',
|
||||
'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo;isSupplementedBy;isSupplementTo',
|
||||
'saveGraph' : 'true',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="bulkTagging"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="bulkTagging" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>mark results respecting some rules as belonging to communities</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'orcidGraphPath',
|
||||
'outputPath': 'bulkTaggingGraphPath',
|
||||
'isLookUpUrl' : 'isLookUpUrl',
|
||||
'pathMap' : 'bulkTaggingPathMap'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/bulktag/oozie_app',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/bulktag'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="affiliationPropagation"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="affiliationPropagation" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>creates relashionships between results and organizations when the organizations are associated to institutional repositories</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'bulkTaggingGraphPath',
|
||||
'outputPath': 'affiliationGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/affiliation/oozie_app',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/affiliation',
|
||||
'saveGraph' : 'true',
|
||||
'blacklist' : 'empty'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="communityOrganizationPropagation"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="communityOrganizationPropagation" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'affiliationGraphPath',
|
||||
'outputPath': 'communityOrganizationGraphPath',
|
||||
'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_organization/oozie_app',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/community_organization',
|
||||
'saveGraph' : 'true'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="resultProjectPropagation"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="resultProjectPropagation" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'communityOrganizationGraphPath',
|
||||
'outputPath': 'fundingGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/funding/oozie_app',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/funding',
|
||||
'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo',
|
||||
'saveGraph' : 'true'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="communitySemrelPropagation"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="communitySemrelPropagation" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities </DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'fundingGraphPath',
|
||||
'outputPath': 'communitySemRelGraphPath',
|
||||
'isLookUpUrl' : 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_semrel/oozie_app',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/community_semrel',
|
||||
'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo',
|
||||
'saveGraph' : 'true'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="countryPropagation"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="countryPropagation" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from </DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'communitySemRelGraphPath',
|
||||
'outputPath': 'countryGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/country/oozie_app',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/country',
|
||||
'allowedtypes' : 'pubsrepository::institutional',
|
||||
'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0',
|
||||
'saveGraph' : 'true'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="graphCleaning"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="graphCleaning" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphInputPath' : 'countryGraphPath',
|
||||
'graphOutputPath': 'cleanedGraphPath',
|
||||
'isLookupUrl': 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/clean'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="blacklistRelations"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="blacklistRelations" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>removes blacklisted relations </DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'cleanedGraphPath',
|
||||
'outputPath': 'blacklistedGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app',
|
||||
'workingDir' : '/tmp/beta_provision/working_dir/blacklist',
|
||||
'postgresURL' : '',
|
||||
'postgresUser' : '',
|
||||
'postgresPassword' : ''
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210803_134357_367</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-08-03T17:08:11+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,778 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="74d90d54-bea4-4a79-82d9-adddcc89e661_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-08-06T09:18:40+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Graph construction [PROD NEW]</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setNsPrefixBlacklist" type="SetEnvParameter">
|
||||
<DESCRIPTION>set blacklist of funder nsPrefixes</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">nsPrefixBlacklist</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="aggregatorGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the path containing the PROD AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">aggregatorGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/00_prod_graph_aggregator</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setRawGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">rawGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/01_graph_raw</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCleanedFirstGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the the consistent graph cleaned</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">cleanedFirstGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/02_graph_cleaned</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setDedupGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the DEDUPED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/03_graph_dedup</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setInferredGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the INFERRED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">inferredGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/04_graph_inferred</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setConsistentGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the CONSISTENCY graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">consistentGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/05_graph_consistent</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setOrcidGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the ORCID enriched graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">orcidGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/06_graph_orcid</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setBulkTaggingGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the BULK TAGGED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">bulkTaggingGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/07_graph_bulktagging</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setAffiliationGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">affiliationGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/08_graph_affiliation</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCommunityOrganizationGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the COMMUNITY from SELECTED SOURCES graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">communityOrganizationGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/09_graph_comunity_organization</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setFundingGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the FUNDING from SEMANTIC RELATION graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">fundingGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/10_graph_funding</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCommunitySemRelGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the COMMUNITY from SEMANTIC RELATION graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">communitySemRelGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/11_graph_comunity_sem_rel</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCountryGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the COUNTRY enriched graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">countryGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/12_graph_country</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCleanedGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the CLEANED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">cleanedGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/13_graph_cleaned</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setBlacklistedGraphPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the blacklisted graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">blacklistedGraphPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/14_graph_blacklisted</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setBulkTaggingPathMap" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the map of paths for the Bulk Tagging</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">bulkTaggingPathMap</PARAM>
|
||||
<PARAM managedBy="system" name="parameterValue" required="true" type="string">{"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"}</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setPropagationOrganizationCommunityMap" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the map of associations organization, community list for the propagation of community to result through organization</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">propagationOrganizationCommunityMap</PARAM>
|
||||
<PARAM managedBy="system" name="parameterValue" required="true" type="string">{"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"],
|
||||
"20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]}
|
||||
</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setDedupConfig" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the dedup orchestrator name</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">dedupConfig</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">dedup-similarity-result-decisiontree-v2</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="actionSetsRaw" type="SetEnvParameter">
|
||||
<DESCRIPTION>declares the ActionSet ids to promote in the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">actionSetIdsRawGraph</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="actionSetsIIS" type="SetEnvParameter">
|
||||
<DESCRIPTION>declares the ActionSet ids to promote in the INFERRED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">actionSetIdsIISGraph</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setIsLookUpUrl" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the IS lookup service address</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">isLookUpUrl</PARAM>
|
||||
<PARAM managedBy="system" name="parameterValue" required="true" type="string">http://services.openaire.eu:8280/is/services/isLookUp?wsdl</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="reuseODFClaims"/>
|
||||
<ARC to="reuseOAFClaims"/>
|
||||
<ARC to="reuseODF_hdfs"/>
|
||||
<ARC to="reuseOAF_hdfs"/>
|
||||
<ARC to="reuseODF"/>
|
||||
<ARC to="reuseOAF"/>
|
||||
<ARC to="reuseDB"/>
|
||||
<ARC to="reuseDBOpenorgs"/>
|
||||
<ARC to="contentPath"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODFClaims" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF claims from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODFClaims</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODF_hdfs" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF records on HDFS from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODFhdfs</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAFClaims" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF claims from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAFClaims</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAF_hdfs" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF records on HDFS from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAFhdfs</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseDB" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached DB content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseDB</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseDBOpenorgs" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OpenOrgs content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseDBOpenorgs</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseODF" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached ODF content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseODF</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="reuseOAF" type="SetEnvParameter">
|
||||
<DESCRIPTION>reuse cached OAF content from the PROD aggregation system</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseOAF</PARAM>
|
||||
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="contentPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>set the PROD aggregator content path</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">contentPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_aggregator</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig2"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig2">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="aggregatorGraph"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="aggregatorGraph" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the PROD AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphOutputPath' : 'aggregatorGraphPath',
|
||||
'isLookupUrl' : 'isLookUpUrl',
|
||||
'reuseODFClaims' : 'reuseODFClaims',
|
||||
'reuseOAFClaims' : 'reuseOAFClaims',
|
||||
'reuseDB' : 'reuseDB',
|
||||
'reuseDBOpenorgs' : 'reuseDBOpenorgs',
|
||||
'reuseODF' : 'reuseODF',
|
||||
'reuseODF_hdfs' : 'reuseODFhdfs',
|
||||
'reuseOAF' : 'reuseOAF',
|
||||
'reuseOAF_hdfs' : 'reuseOAFhdfs',
|
||||
'contentPath' : 'contentPath',
|
||||
'nsPrefixBlacklist' : 'nsPrefixBlacklist'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app',
|
||||
'mongoURL' : '',
|
||||
'mongoDb' : '',
|
||||
'mdstoreManagerUrl' : '',
|
||||
'postgresURL' : '',
|
||||
'postgresUser' : '',
|
||||
'postgresPassword' : '',
|
||||
'postgresOpenOrgsURL' : '',
|
||||
'postgresOpenOrgsUser' : '',
|
||||
'postgresOpenOrgsPassword' : '',
|
||||
'shouldHashId' : 'true',
|
||||
'importOpenorgs' : 'true',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/prod_aggregator'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="promoteActionsRaw"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="promoteActionsRaw" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'inputActionSetIds' : 'actionSetIdsRawGraph',
|
||||
'inputGraphRootPath' : 'aggregatorGraphPath',
|
||||
'outputGraphRootPath' : 'rawGraphPath',
|
||||
'isLookupUrl' : 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G',
|
||||
'activePromoteDatasetActionPayload' : 'true',
|
||||
'activePromoteDatasourceActionPayload' : 'true',
|
||||
'activePromoteOrganizationActionPayload' : 'true',
|
||||
'activePromoteOtherResearchProductActionPayload' : 'true',
|
||||
'activePromoteProjectActionPayload' : 'true',
|
||||
'activePromotePublicationActionPayload' : 'true',
|
||||
'activePromoteRelationActionPayload' : 'true',
|
||||
'activePromoteResultActionPayload' : 'true',
|
||||
'activePromoteSoftwareActionPayload' : 'true',
|
||||
'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsRaw'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="graphCleaningFirst"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="graphCleaningFirst" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphInputPath' : 'rawGraphPath',
|
||||
'graphOutputPath': 'cleanedFirstGraphPath',
|
||||
'isLookupUrl': 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/clean'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="duplicateScan"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="duplicateScan" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>search for duplicates in the raw graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'actionSetId' : 'dedupConfig',
|
||||
'graphBasePath' : 'cleanedFirstGraphPath',
|
||||
'dedupGraphPath': 'dedupGraphPath',
|
||||
'isLookUpUrl' : 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app',
|
||||
'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple',
|
||||
'workingPath' : '/tmp/prod_provision/working_dir/dedup',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="promoteActionsIIS"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="promoteActionsIIS" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the INFERRED graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'inputActionSetIds' : 'actionSetIdsIISGraph',
|
||||
'inputGraphRootPath' : 'dedupGraphPath',
|
||||
'outputGraphRootPath' : 'inferredGraphPath',
|
||||
'isLookupUrl' : 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G',
|
||||
'activePromoteDatasetActionPayload' : 'true',
|
||||
'activePromoteDatasourceActionPayload' : 'true',
|
||||
'activePromoteOrganizationActionPayload' : 'true',
|
||||
'activePromoteOtherResearchProductActionPayload' : 'true',
|
||||
'activePromoteProjectActionPayload' : 'true',
|
||||
'activePromotePublicationActionPayload' : 'true',
|
||||
'activePromoteRelationActionPayload' : 'true',
|
||||
'activePromoteResultActionPayload' : 'true',
|
||||
'activePromoteSoftwareActionPayload' : 'true',
|
||||
'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsIIS'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="dedupConsistency"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="dedupConsistency" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>mark duplicates as deleted and redistribute the relationships</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphBasePath' : 'inferredGraphPath',
|
||||
'graphOutputPath': 'consistentGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app',
|
||||
'workingPath' : '/tmp/prod_provision/working_dir/dedup'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="orcidPropagation"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="orcidPropagation" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>propagates ORCID among results linked by allowedsemrels semantic relationships</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'consistentGraphPath',
|
||||
'outputPath': 'orcidGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/orcidtoresultfromsemrel/oozie_app',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/orcid',
|
||||
'allowedsemrels' : 'isSupplementedBy;isSupplementTo',
|
||||
'saveGraph' : 'true'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="bulkTagging"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="bulkTagging" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>mark results respecting some rules as belonging to communities</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'orcidGraphPath',
|
||||
'outputPath': 'bulkTaggingGraphPath',
|
||||
'isLookUpUrl' : 'isLookUpUrl',
|
||||
'pathMap' : 'bulkTaggingPathMap'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/bulktag/oozie_app',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/bulktag'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="affiliationPropagation"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="affiliationPropagation" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>creates relashionships between results and organizations when the organizations are associated to institutional repositories</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'bulkTaggingGraphPath',
|
||||
'outputPath': 'affiliationGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/affiliation/oozie_app',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/affiliation',
|
||||
'saveGraph' : 'true',
|
||||
'blacklist' : 'empty'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="communityOrganizationPropagation"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="communityOrganizationPropagation" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'affiliationGraphPath',
|
||||
'outputPath': 'communityOrganizationGraphPath',
|
||||
'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_organization/oozie_app',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/community_organization',
|
||||
'saveGraph' : 'true'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="resultProjectPropagation"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="resultProjectPropagation" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'communityOrganizationGraphPath',
|
||||
'outputPath': 'fundingGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/funding/oozie_app',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/funding',
|
||||
'allowedsemrels' : 'isSupplementedBy;isSupplementTo',
|
||||
'saveGraph' : 'true'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="communitySemrelPropagation"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="communitySemrelPropagation" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities </DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'fundingGraphPath',
|
||||
'outputPath': 'communitySemRelGraphPath',
|
||||
'isLookUpUrl' : 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_semrel/oozie_app',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/community_semrel',
|
||||
'allowedsemrels' : 'isSupplementedBy;isSupplementTo',
|
||||
'saveGraph' : 'true'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="countryPropagation"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="countryPropagation" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from </DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'communitySemRelGraphPath',
|
||||
'outputPath': 'countryGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/country/oozie_app',
|
||||
'sparkExecutorCores' : '3',
|
||||
'sparkExecutorMemory' : '10G',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/country',
|
||||
'allowedtypes' : 'pubsrepository::institutional',
|
||||
'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0',
|
||||
'saveGraph' : 'true'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="graphCleaning"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="graphCleaning" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphInputPath' : 'countryGraphPath',
|
||||
'graphOutputPath': 'cleanedGraphPath',
|
||||
'isLookupUrl': 'isLookUpUrl'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/clean'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="blacklistRelations"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="blacklistRelations" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>removes blacklisted relations </DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'sourcePath' : 'cleanedGraphPath',
|
||||
'outputPath': 'blacklistedGraphPath'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/blacklist/oozie_app',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/blacklist',
|
||||
'postgresURL' : '',
|
||||
'postgresUser' : '',
|
||||
'postgresPassword' : ''
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210723_171026_279</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-07-24T00:00:39+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,74 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="5d750977-bec2-47f4-97bb-1b7500e4704e_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-07-28T07:49:37+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Graph to HiveDB [PROD]</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setInputPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the path containing the AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">inputPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string"></PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setHiveDbName" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">hiveDbName</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string"></PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="graph2hive"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="graph2hive" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'inputPath' : 'inputPath',
|
||||
'hiveDbName' : 'hiveDbName'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hive/oozie_app',
|
||||
'sparkDriverMemory' : '4G',
|
||||
'sparkExecutorMemory' : '10G',
|
||||
'sparkExecutorCores' : '3'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210728_075001_400</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-07-28T08:04:00+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,99 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="8d36cc94-5b82-413c-923f-e7b3953e41ba_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-08-06T13:48:17+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Update Solr [PROD]</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setInputPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the path containing the GRAPH to index</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">inputGraphRootPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/prod_provision/graph/14_graph_blacklisted</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setCollection" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the target path to store the RAW graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">format</PARAM>
|
||||
<PARAM function="validValues(['TMF', 'DMF'])" managedBy="user" name="parameterValue" required="true" type="string">DMF</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setIsLookUpUrl" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the lookup address</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">isLookupUrl</PARAM>
|
||||
<PARAM managedBy="system" name="parameterValue" required="true" type="string">http://services.openaire.eu:8280/is/services/isLookUp?wsdl</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="updateSolr"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="updateSolr" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>create the AGGREGATOR graph</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'inputGraphRootPath' : 'inputGraphRootPath',
|
||||
'isLookupUrl' : 'isLookupUrl',
|
||||
'format' : 'format'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/provision/oozie_app',
|
||||
'sourceMaxRelations' : '1000',
|
||||
'targetMaxRelations' : '10000000',
|
||||
'relPartitions' : '3000',
|
||||
'batchSize' : '2000',
|
||||
'relationFilter' : 'isAuthorInstitutionOf,produces,hasAmongTopNSimilarDocuments,cites,isCitedBy',
|
||||
'otherDsTypeId' : 'scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource',
|
||||
'resumeFrom' : 'prepare_relations',
|
||||
'shouldIndex' : 'true',
|
||||
'outputFormat' : 'SOLR',
|
||||
'sparkDriverMemoryForJoining' : '3G',
|
||||
'sparkExecutorMemoryForJoining' : '7G',
|
||||
'sparkExecutorCoresForJoining' : '4',
|
||||
'sparkDriverMemoryForIndexing' : '2G',
|
||||
'sparkExecutorMemoryForIndexing' : '2G',
|
||||
'sparkExecutorCoresForIndexing' : '64',
|
||||
'sparkNetworkTimeout' : '600',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/update_solr'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210724_062705_620</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-07-25T13:25:37+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,100 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="65ca9122-f8fe-4aa6-9fb2-bc1e1ffb2dda_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-07-24T17:42:40+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Update Stats [PROD]</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setGraphDbName" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the OpenAIRE graph DB name</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">openaire_db_name</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">openaire_prod_yyyyMMdd</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setStatsDbName" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the STATS DB name</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">stats_db_name</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">openaire_prod_stats_yyyyMMdd</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="statsMonitorDbName" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the STATS MONITOR DB name</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">monitor_db_name</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">openaire_prod_stats_monitor_yyyyMMdd</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="statsObservatoryDbName" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the STATS OBSERVATORY DB name</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">observatory_db_name</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">openaire_prod_stats_observatory_yyyyMMdd</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="updateStatsDB"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="updateStatsDB" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>update the content in the stats DB</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'openaire_db_name' : 'openaire_db_name',
|
||||
'stats_db_name' : 'stats_db_name',
|
||||
'monitor_db_name' : 'monitor_db_name',
|
||||
'observatory_db_name' : 'observatory_db_name'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_update/oozie_app',
|
||||
'hive_timeout' : '15000',
|
||||
'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool',
|
||||
'stats_db_shadow_name' : 'openaire_prod_stats_shadow',
|
||||
'external_stats_db_name' : 'stats_ext',
|
||||
'monitor_db_shadow_name' : 'openaire_prod_stats_monitor_shadow',
|
||||
'observatory_db_shadow_name' : 'openaire_prod_stats_observatory_shadow',
|
||||
'context_api_url' : 'https://services.openaire.eu/openaire'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210725_065608_71</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-07-26T07:35:55+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,87 @@
|
|||
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="546f8ba1-2ca2-4e29-86ea-c9489ab9b859_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-07-27T16:07:05+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Publish Stats [PROD]</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Content Publishing</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>35</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setStatsDbName" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the STATS DB name</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">stats_db_name</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">openaire_prod_stats_yyyyMMdd</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="statsMonitorDbName" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the STATS MONITOR DB name</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">monitor_db_name</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">openaire_prod_stats_monitor_yyyyMMdd</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="statsObservatoryDbName" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the STATS OBSERVATORY DB name</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">observatory_db_name</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">openaire_prod_stats_observatory_yyyyMMdd</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="publishStatsDB"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="publishStatsDB" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>publishes the stats DB to the public schema</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'stats_db_name' : 'stats_db_name',
|
||||
'monitor_db_name' : 'monitor_db_name',
|
||||
'observatory_db_name' : 'observatory_db_name'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_promote/oozie_app',
|
||||
'hive_timeout' : '150000',
|
||||
'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool',
|
||||
'stats_db_production_name' : 'openaire_prod_stats',
|
||||
'monitor_db_production_name' : 'openaire_prod_stats_monitor',
|
||||
'observatory_db_production_name' : 'openaire_prod_stats_observatory'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210727_160728_625</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-07-27T16:53:01+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR/>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,131 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="8d36cc94-5b82-413c-923f-e7b3953e41bb_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
|
||||
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
|
||||
<RESOURCE_KIND value="WorkflowDSResources"/>
|
||||
<RESOURCE_URI value=""/>
|
||||
<DATE_OF_CREATION value="2021-02-15T09:52:39+00:00"/>
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<WORKFLOW_NAME>Update Broker events [PROD OCEAN]</WORKFLOW_NAME>
|
||||
<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
|
||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||
<CONFIGURATION start="manual">
|
||||
<NODE isStart="true" name="setGraphInputPath" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the path containing the GRAPH to scan</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">graphInputPath</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string"></PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setDatasourceIdWhitelist" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the datasource Ids Whitelist</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">datasourceIdWhitelist</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">openaire____::9ecafa3655143cbc4bc75853035cd432,opendoar____::dc6e224a8d74ce03bf301152d6e33e97,openaire____::09da65eaaa6deac2f785df1e0ae95a06,openaire____::3db634fc5446f389d0b826ea400a5da6,openaire____::5a38cb462ac487bf26bdb86009fe3e74,openaire____::3c29379cc184f66861e858bc7aa9615b,openaire____::4657147e48a1f32637bfe3743bce76c6,openaire____::c3267ea1c3f378c456209b6df241624e,opendoar____::358aee4cc897452c00244351e4d91f69,re3data_____::7b0ad08687b2c960d5aeef06f811d5e6,opendoar____::798ed7d4ee7138d49b8828958048130a,opendoar____::6f4922f45568161a8cdf4ad2299f6d23,opendoar____::4aa0e93b918848be0b7728b4b1568d8a,openaire____::02b55e4f52388520bfe11f959f836e68</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setDatasourceTypeWhitelist" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the datasource type Whitelist</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">datasourceTypeWhitelist</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic,datarepository::unknown,orprepository,softwarerepository</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setDatasourceIdBlacklist" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the datasource Id Blacklist</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">datasourceIdBlacklist</PARAM>
|
||||
<PARAM managedBy="system" name="parameterValue" required="true" type="string">-</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setTopicWhitelist" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the TOPIC whitelist (* = all topics)</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">topicWhitelist</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">ENRICH/MISSING/SUBJECT/DDC,ENRICH/MISSING/SUBJECT/JEL,ENRICH/MISSING/SUBJECT/MESHEUROPMC,ENRICH/MISSING/PUBLICATION_DATE,ENRICH/MISSING/PID,ENRICH/MISSING/PROJECT,ENRICH/MISSING/SUBJECT/ACM,ENRICH/MISSING/SUBJECT/ARXIV,ENRICH/MISSING/OPENACCESS_VERSION,ENRICH/MISSING/AUTHOR/ORCID,ENRICH/MISSING/ABSTRACT,ENRICH/MORE/SUBJECT/ACM,ENRICH/MORE/SUBJECT/ARXIV,ENRICH/MORE/SUBJECT/DDC,ENRICH/MORE/SUBJECT/JEL,ENRICH/MORE/OPENACCESS_VERSION,ENRICH/MORE/SUBJECT/MESHEUROPMC,ENRICH/MORE/PID</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isStart="true" name="setOutputDir" type="SetEnvParameter">
|
||||
<DESCRIPTION>Set the output path to store the Event records</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">outputDir</PARAM>
|
||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/var/lib/dnet/broker_PROD/events</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="waitConfig"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE isJoin="true" name="waitConfig">
|
||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||
<PARAMETERS/>
|
||||
<ARCS>
|
||||
<ARC to="updateBrokerEvents"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
<NODE name="updateBrokerEvents" type="SubmitHadoopJob">
|
||||
<DESCRIPTION>update the BROKER events</DESCRIPTION>
|
||||
<PARAMETERS>
|
||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||
{
|
||||
'graphInputPath' : 'graphInputPath',
|
||||
'datasourceIdWhitelist' : 'datasourceIdWhitelist',
|
||||
'datasourceTypeWhitelist' : 'datasourceTypeWhitelist',
|
||||
'datasourceIdBlacklist' : 'datasourceIdBlacklist',
|
||||
'topicWhitelist' : 'topicWhitelist',
|
||||
'outputDir' : 'outputDir'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||
{
|
||||
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/broker/generate_events/oozie_app',
|
||||
'esEventIndexName' : '',
|
||||
'esNotificationsIndexName' : '',
|
||||
'esIndexHost' : '',
|
||||
'maxIndexedEventsForDsAndTopic' : '100',
|
||||
'esBatchWriteRetryCount' : '8',
|
||||
'esBatchWriteRetryWait' : '60s',
|
||||
'esBatchSizeEntries' : '200',
|
||||
'esNodesWanOnly' : 'true',
|
||||
'brokerApiBaseUrl' : '',
|
||||
'brokerDbUrl' : '',
|
||||
'brokerDbUser' : '',
|
||||
'brokerDbPassword' : '',
|
||||
'sparkDriverMemory' : '3G',
|
||||
'sparkExecutorMemory' : '7G',
|
||||
'sparkExecutorCores' : '6',
|
||||
'workingDir' : '/tmp/prod_provision/working_dir/broker_events'
|
||||
}
|
||||
</PARAM>
|
||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||
</PARAMETERS>
|
||||
<ARCS>
|
||||
<ARC to="success"/>
|
||||
</ARCS>
|
||||
</NODE>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_EXECUTION_ID>wf_20210709_073839_206</LAST_EXECUTION_ID>
|
||||
<LAST_EXECUTION_DATE>2021-07-09T11:01:01+00:00</LAST_EXECUTION_DATE>
|
||||
<LAST_EXECUTION_STATUS>FAILURE</LAST_EXECUTION_STATUS>
|
||||
<LAST_EXECUTION_ERROR></LAST_EXECUTION_ERROR>
|
||||
</STATUS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -24,8 +24,6 @@
|
|||
<module>dhp-dedup-openaire</module>
|
||||
<module>dhp-enrichment</module>
|
||||
<module>dhp-graph-provision</module>
|
||||
<!-- <module>dhp-dedup-scholexplorer</module>-->
|
||||
<!-- <module>dhp-graph-provision-scholexplorer</module>-->
|
||||
<module>dhp-blacklist</module>
|
||||
<module>dhp-stats-update</module>
|
||||
<module>dhp-stats-promote</module>
|
||||
|
|
Loading…
Reference in New Issue