forked from D-Net/dnet-hadoop
Merge branch 'master' of code-repo.d4science.org:D-Net/dnet-hadoop
This commit is contained in:
commit
a44b9b36b9
|
@ -45,6 +45,16 @@ public class EntityMergerTest implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void softwareMergerTest() throws InstantiationException, IllegalAccessException {
|
||||||
|
List<Tuple2<String, Software>> softwares = readSample(testEntityBasePath + "/software_merge.json", Software.class);
|
||||||
|
|
||||||
|
Software merged = DedupRecordFactory
|
||||||
|
.entityMerger(dedupId, softwares.iterator(), 0, dataInfo, Software.class);
|
||||||
|
|
||||||
|
System.out.println(merged.getBestaccessright().getClassid());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void publicationMergerTest() throws InstantiationException, IllegalAccessException {
|
public void publicationMergerTest() throws InstantiationException, IllegalAccessException {
|
||||||
|
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -8,6 +8,7 @@ import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -97,7 +98,7 @@ public class CleanGraphSparkJob {
|
||||||
.json(outputPath);
|
.json(outputPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends Oaf> T fixDefaults(T value) {
|
protected static <T extends Oaf> T fixDefaults(T value) {
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
// nothing to clean here
|
// nothing to clean here
|
||||||
} else if (value instanceof Project) {
|
} else if (value instanceof Project) {
|
||||||
|
@ -134,11 +135,6 @@ public class CleanGraphSparkJob {
|
||||||
.setResourcetype(
|
.setResourcetype(
|
||||||
qualifier("UNKNOWN", "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
|
qualifier("UNKNOWN", "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
|
||||||
}
|
}
|
||||||
if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) {
|
|
||||||
r
|
|
||||||
.setBestaccessright(
|
|
||||||
qualifier("UNKNOWN", "not available", ModelConstants.DNET_ACCESS_MODES));
|
|
||||||
}
|
|
||||||
if (Objects.nonNull(r.getInstance())) {
|
if (Objects.nonNull(r.getInstance())) {
|
||||||
for (Instance i : r.getInstance()) {
|
for (Instance i : r.getInstance()) {
|
||||||
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
|
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
|
||||||
|
@ -152,6 +148,15 @@ public class CleanGraphSparkJob {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) {
|
||||||
|
Qualifier bestaccessrights = AbstractMdRecordToOafMapper.createBestAccessRights(r.getInstance());
|
||||||
|
if (Objects.isNull(bestaccessrights)) {
|
||||||
|
r.setBestaccessright(
|
||||||
|
qualifier("UNKNOWN", "not available", ModelConstants.DNET_ACCESS_MODES));
|
||||||
|
} else {
|
||||||
|
r.setBestaccessright(bestaccessrights);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (Objects.nonNull(r.getAuthor())) {
|
if (Objects.nonNull(r.getAuthor())) {
|
||||||
boolean nullRank = r
|
boolean nullRank = r
|
||||||
.getAuthor()
|
.getAuthor()
|
||||||
|
|
|
@ -378,6 +378,10 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
protected abstract Field<String> prepareDatasetStorageDate(Document doc, DataInfo info);
|
protected abstract Field<String> prepareDatasetStorageDate(Document doc, DataInfo info);
|
||||||
|
|
||||||
|
public static Qualifier createBestAccessRights(final List<Instance> instanceList) {
|
||||||
|
return getBestAccessRights(instanceList);
|
||||||
|
}
|
||||||
|
|
||||||
protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
|
protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
|
||||||
if (instanceList != null) {
|
if (instanceList != null) {
|
||||||
final Optional<Qualifier> min = instanceList
|
final Optional<Qualifier> min = instanceList
|
||||||
|
|
|
@ -57,6 +57,8 @@ public class CleaningFunctionTest {
|
||||||
String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result.json"));
|
String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result.json"));
|
||||||
Publication p_in = MAPPER.readValue(json, Publication.class);
|
Publication p_in = MAPPER.readValue(json, Publication.class);
|
||||||
|
|
||||||
|
assertNull(p_in.getBestaccessright());
|
||||||
|
|
||||||
assertTrue(p_in instanceof Result);
|
assertTrue(p_in instanceof Result);
|
||||||
assertTrue(p_in instanceof Publication);
|
assertTrue(p_in instanceof Publication);
|
||||||
|
|
||||||
|
@ -84,6 +86,9 @@ public class CleaningFunctionTest {
|
||||||
.map(p -> p.getQualifier())
|
.map(p -> p.getQualifier())
|
||||||
.allMatch(q -> pidTerms.contains(q.getClassid())));
|
.allMatch(q -> pidTerms.contains(q.getClassid())));
|
||||||
|
|
||||||
|
Publication p_defaults = CleanGraphSparkJob.fixDefaults(p_out);
|
||||||
|
assertEquals("CLOSED", p_defaults.getBestaccessright().getClassid());
|
||||||
|
|
||||||
// TODO add more assertions to verity the cleaned values
|
// TODO add more assertions to verity the cleaned values
|
||||||
System.out.println(MAPPER.writeValueAsString(p_out));
|
System.out.println(MAPPER.writeValueAsString(p_out));
|
||||||
|
|
||||||
|
|
|
@ -185,12 +185,7 @@
|
||||||
"surname": ""
|
"surname": ""
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"bestaccessright": {
|
"bestaccessright": null,
|
||||||
"classid": "CLOSED",
|
|
||||||
"classname": "Closed Access",
|
|
||||||
"schemeid": "dnet:access_modes",
|
|
||||||
"schemename": "dnet:access_modes"
|
|
||||||
},
|
|
||||||
"collectedfrom": [
|
"collectedfrom": [
|
||||||
{
|
{
|
||||||
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
||||||
|
|
|
@ -11,6 +11,29 @@
|
||||||
<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
|
<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
|
||||||
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
|
||||||
<CONFIGURATION start="manual">
|
<CONFIGURATION start="manual">
|
||||||
|
|
||||||
|
<NODE isStart="true" name="setReuseContent" type="SetEnvParameter">
|
||||||
|
<DESCRIPTION>reuse cached content from the aggregation system</DESCRIPTION>
|
||||||
|
<PARAMETERS>
|
||||||
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">reuseContent</PARAM>
|
||||||
|
<PARAM function="validValues(['true', 'false'])" managedBy="user" name="parameterValue" required="true" type="string">true</PARAM>
|
||||||
|
</PARAMETERS>
|
||||||
|
<ARCS>
|
||||||
|
<ARC to="waitConfig"/>
|
||||||
|
</ARCS>
|
||||||
|
</NODE>
|
||||||
|
|
||||||
|
<NODE isStart="true" name="setContentPath" type="SetEnvParameter">
|
||||||
|
<DESCRIPTION>set the aggregator content path</DESCRIPTION>
|
||||||
|
<PARAMETERS>
|
||||||
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">contentPath</PARAM>
|
||||||
|
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_aggregator</PARAM>
|
||||||
|
</PARAMETERS>
|
||||||
|
<ARCS>
|
||||||
|
<ARC to="waitConfig"/>
|
||||||
|
</ARCS>
|
||||||
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setAggregatorGraphPath" type="SetEnvParameter">
|
<NODE isStart="true" name="setAggregatorGraphPath" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the path containing the AGGREGATOR graph</DESCRIPTION>
|
<DESCRIPTION>Set the path containing the AGGREGATOR graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -62,87 +85,94 @@
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setCleanedGraphPath" type="SetEnvParameter">
|
<NODE isStart="true" name="setOrcidGraphPath" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the target path to store the CLEANED graph</DESCRIPTION>
|
<DESCRIPTION>Set the target path to store the ORCID enriched graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">cleanedGraphPath</PARAM>
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">orcidGraphPath</PARAM>
|
||||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/05_graph_cleaned</PARAM>
|
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/05_graph_orcid</PARAM>
|
||||||
</PARAMETERS>
|
</PARAMETERS>
|
||||||
<ARCS>
|
<ARCS>
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setOrcidGraphPath" type="SetEnvParameter">
|
|
||||||
<DESCRIPTION>Set the target path to store the ORCID enriched graph</DESCRIPTION>
|
|
||||||
<PARAMETERS>
|
|
||||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">orcidGraphPath</PARAM>
|
|
||||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/06_graph_orcid</PARAM>
|
|
||||||
</PARAMETERS>
|
|
||||||
<ARCS>
|
|
||||||
<ARC to="waitConfig"/>
|
|
||||||
</ARCS>
|
|
||||||
</NODE>
|
|
||||||
<NODE isStart="true" name="setBulkTaggingGraphPath" type="SetEnvParameter">
|
<NODE isStart="true" name="setBulkTaggingGraphPath" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the target path to store the BULK TAGGED graph</DESCRIPTION>
|
<DESCRIPTION>Set the target path to store the BULK TAGGED graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">bulkTaggingGraphPath</PARAM>
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">bulkTaggingGraphPath</PARAM>
|
||||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/07_graph_bulktagging</PARAM>
|
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/06_graph_bulktagging</PARAM>
|
||||||
</PARAMETERS>
|
</PARAMETERS>
|
||||||
<ARCS>
|
<ARCS>
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setAffiliationGraphPath" type="SetEnvParameter">
|
<NODE isStart="true" name="setAffiliationGraphPath" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph</DESCRIPTION>
|
<DESCRIPTION>Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">affiliationGraphPath</PARAM>
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">affiliationGraphPath</PARAM>
|
||||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/08_graph_affiliation</PARAM>
|
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/07_graph_affiliation</PARAM>
|
||||||
</PARAMETERS>
|
</PARAMETERS>
|
||||||
<ARCS>
|
<ARCS>
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setCommunityOrganizationGraphPath" type="SetEnvParameter">
|
<NODE isStart="true" name="setCommunityOrganizationGraphPath" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the target path to store the COMMUNITY from SELECTED SOURCES graph</DESCRIPTION>
|
<DESCRIPTION>Set the target path to store the COMMUNITY from SELECTED SOURCES graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">communityOrganizationGraphPath</PARAM>
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">communityOrganizationGraphPath</PARAM>
|
||||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/09_graph_comunity_organization</PARAM>
|
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/08_graph_comunity_organization</PARAM>
|
||||||
</PARAMETERS>
|
</PARAMETERS>
|
||||||
<ARCS>
|
<ARCS>
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setFundingGraphPath" type="SetEnvParameter">
|
<NODE isStart="true" name="setFundingGraphPath" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the target path to store the FUNDING from SEMANTIC RELATION graph</DESCRIPTION>
|
<DESCRIPTION>Set the target path to store the FUNDING from SEMANTIC RELATION graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">fundingGraphPath</PARAM>
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">fundingGraphPath</PARAM>
|
||||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/10_graph_funding</PARAM>
|
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/09_graph_funding</PARAM>
|
||||||
</PARAMETERS>
|
</PARAMETERS>
|
||||||
<ARCS>
|
<ARCS>
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setCommunitySemRelGraphPath" type="SetEnvParameter">
|
<NODE isStart="true" name="setCommunitySemRelGraphPath" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the target path to store the COMMUNITY from SEMANTIC RELATION graph</DESCRIPTION>
|
<DESCRIPTION>Set the target path to store the COMMUNITY from SEMANTIC RELATION graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">communitySemRelGraphPath</PARAM>
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">communitySemRelGraphPath</PARAM>
|
||||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/11_graph_comunity_sem_rel</PARAM>
|
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/10_graph_comunity_sem_rel</PARAM>
|
||||||
</PARAMETERS>
|
</PARAMETERS>
|
||||||
<ARCS>
|
<ARCS>
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setCountryGraphPath" type="SetEnvParameter">
|
<NODE isStart="true" name="setCountryGraphPath" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the target path to store the COUNTRY enriched graph</DESCRIPTION>
|
<DESCRIPTION>Set the target path to store the COUNTRY enriched graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
<PARAM managedBy="system" name="parameterName" required="true" type="string">countryGraphPath</PARAM>
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">countryGraphPath</PARAM>
|
||||||
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/12_graph_country</PARAM>
|
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/11_graph_country</PARAM>
|
||||||
</PARAMETERS>
|
</PARAMETERS>
|
||||||
<ARCS>
|
<ARCS>
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
|
<NODE isStart="true" name="setCleanedGraphPath" type="SetEnvParameter">
|
||||||
|
<DESCRIPTION>Set the target path to store the CLEANED graph</DESCRIPTION>
|
||||||
|
<PARAMETERS>
|
||||||
|
<PARAM managedBy="system" name="parameterName" required="true" type="string">cleanedGraphPath</PARAM>
|
||||||
|
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/beta_provision/graph/12_graph_cleaned</PARAM>
|
||||||
|
</PARAMETERS>
|
||||||
|
<ARCS>
|
||||||
|
<ARC to="waitConfig"/>
|
||||||
|
</ARCS>
|
||||||
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setBlacklistedGraphPath" type="SetEnvParameter">
|
<NODE isStart="true" name="setBlacklistedGraphPath" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the target path to store the blacklisted graph</DESCRIPTION>
|
<DESCRIPTION>Set the target path to store the blacklisted graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -153,6 +183,7 @@
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setIsLookUpUrl" type="SetEnvParameter">
|
<NODE isStart="true" name="setIsLookUpUrl" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the lookup address</DESCRIPTION>
|
<DESCRIPTION>Set the lookup address</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -163,6 +194,7 @@
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setBulkTaggingPathMap" type="SetEnvParameter">
|
<NODE isStart="true" name="setBulkTaggingPathMap" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the map of paths for the Bulk Tagging</DESCRIPTION>
|
<DESCRIPTION>Set the map of paths for the Bulk Tagging</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -173,6 +205,7 @@
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setPropagationOrganizationCommunityMap" type="SetEnvParameter">
|
<NODE isStart="true" name="setPropagationOrganizationCommunityMap" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the map of associations organization, community list for the propagation of community to result through organization</DESCRIPTION>
|
<DESCRIPTION>Set the map of associations organization, community list for the propagation of community to result through organization</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -185,6 +218,7 @@
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="setDedupConfig" type="SetEnvParameter">
|
<NODE isStart="true" name="setDedupConfig" type="SetEnvParameter">
|
||||||
<DESCRIPTION>Set the dedup orchestrator name</DESCRIPTION>
|
<DESCRIPTION>Set the dedup orchestrator name</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -195,6 +229,7 @@
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="actionSetsRaw" type="SetEnvParameter">
|
<NODE isStart="true" name="actionSetsRaw" type="SetEnvParameter">
|
||||||
<DESCRIPTION>declares the ActionSet ids to promote in the RAW graph</DESCRIPTION>
|
<DESCRIPTION>declares the ActionSet ids to promote in the RAW graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -205,6 +240,7 @@
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isStart="true" name="actionSetsIIS" type="SetEnvParameter">
|
<NODE isStart="true" name="actionSetsIIS" type="SetEnvParameter">
|
||||||
<DESCRIPTION>declares the ActionSet ids to promote in the INFERRED graph</DESCRIPTION>
|
<DESCRIPTION>declares the ActionSet ids to promote in the INFERRED graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -215,6 +251,7 @@
|
||||||
<ARC to="waitConfig"/>
|
<ARC to="waitConfig"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE isJoin="true" name="waitConfig">
|
<NODE isJoin="true" name="waitConfig">
|
||||||
<DESCRIPTION>wait configurations</DESCRIPTION>
|
<DESCRIPTION>wait configurations</DESCRIPTION>
|
||||||
<PARAMETERS/>
|
<PARAMETERS/>
|
||||||
|
@ -222,6 +259,7 @@
|
||||||
<ARC to="aggregatorGraph"/>
|
<ARC to="aggregatorGraph"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="aggregatorGraph" type="SubmitHadoopJob">
|
<NODE name="aggregatorGraph" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>create the AGGREGATOR graph</DESCRIPTION>
|
<DESCRIPTION>create the AGGREGATOR graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -230,7 +268,9 @@
|
||||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||||
{
|
{
|
||||||
'graphOutputPath' : 'aggregatorGraphPath',
|
'graphOutputPath' : 'aggregatorGraphPath',
|
||||||
'isLookupUrl' : 'isLookUpUrl'
|
'isLookupUrl' : 'isLookUpUrl',
|
||||||
|
'reuseContent' : 'reuseContent',
|
||||||
|
'contentPath' : 'contentPath'
|
||||||
}
|
}
|
||||||
</PARAM>
|
</PARAM>
|
||||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||||
|
@ -241,8 +281,6 @@
|
||||||
'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
|
'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
|
||||||
'postgresUser' : 'dnet',
|
'postgresUser' : 'dnet',
|
||||||
'postgresPassword' : '',
|
'postgresPassword' : '',
|
||||||
'reuseContent' : 'false',
|
|
||||||
'contentPath' : '/tmp/beta_provision/aggregator',
|
|
||||||
'workingDir' : '/tmp/beta_provision/working_dir/aggregator'
|
'workingDir' : '/tmp/beta_provision/working_dir/aggregator'
|
||||||
}
|
}
|
||||||
</PARAM>
|
</PARAM>
|
||||||
|
@ -252,6 +290,7 @@
|
||||||
<ARC to="promoteActionsRaw"/>
|
<ARC to="promoteActionsRaw"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="promoteActionsRaw" type="SubmitHadoopJob">
|
<NODE name="promoteActionsRaw" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>create the RAW graph</DESCRIPTION>
|
<DESCRIPTION>create the RAW graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -289,6 +328,7 @@
|
||||||
<ARC to="duplicateScan"/>
|
<ARC to="duplicateScan"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="duplicateScan" type="SubmitHadoopJob">
|
<NODE name="duplicateScan" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>search for duplicates in the raw graph</DESCRIPTION>
|
<DESCRIPTION>search for duplicates in the raw graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -314,6 +354,7 @@
|
||||||
<ARC to="promoteActionsIIS"/>
|
<ARC to="promoteActionsIIS"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="promoteActionsIIS" type="SubmitHadoopJob">
|
<NODE name="promoteActionsIIS" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>create the INFERRED graph</DESCRIPTION>
|
<DESCRIPTION>create the INFERRED graph</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -351,6 +392,7 @@
|
||||||
<ARC to="dedupConsistency"/>
|
<ARC to="dedupConsistency"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="dedupConsistency" type="SubmitHadoopJob">
|
<NODE name="dedupConsistency" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>mark duplicates as deleted and redistribute the relationships</DESCRIPTION>
|
<DESCRIPTION>mark duplicates as deleted and redistribute the relationships</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -375,41 +417,6 @@
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="graphCleaning" type="SubmitHadoopJob">
|
|
||||||
<DESCRIPTION>clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
|
|
||||||
<PARAMETERS>
|
|
||||||
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
|
||||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
|
||||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
|
||||||
{
|
|
||||||
'graphInputPath' : 'consistentGraphPath',
|
|
||||||
'graphOutputPath': 'cleanedGraphPath',
|
|
||||||
'isLookupUrl': 'isLookUpUrl'
|
|
||||||
}
|
|
||||||
</PARAM>
|
|
||||||
<PARAM managedBy="system" name="params" required="true" type="string">
|
|
||||||
{
|
|
||||||
'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app',
|
|
||||||
'workingPath' : '/tmp/beta_provision/working_dir/clean'
|
|
||||||
}
|
|
||||||
</PARAM>
|
|
||||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
|
||||||
</PARAMETERS>
|
|
||||||
<ARCS>
|
|
||||||
<ARC to="SKIP_ENRICHMENT"/>
|
|
||||||
</ARCS>
|
|
||||||
</NODE>
|
|
||||||
|
|
||||||
<NODE name="SKIP_ENRICHMENT" type="Selection">
|
|
||||||
<DESCRIPTION>Do we skip the graph enrichment steps? (Yes to prepare the graph for the IIS)</DESCRIPTION>
|
|
||||||
<PARAMETERS>
|
|
||||||
<PARAM function="validValues(['YES', 'NO'])" managedBy="user" name="selection" required="true" type="string">NO</PARAM>
|
|
||||||
</PARAMETERS>
|
|
||||||
<ARCS>
|
|
||||||
<ARC name="YES" to="success"/>
|
|
||||||
<ARC name="NO" to="orcidPropagation"/>
|
|
||||||
</ARCS>
|
|
||||||
</NODE>
|
|
||||||
<NODE name="orcidPropagation" type="SubmitHadoopJob">
|
<NODE name="orcidPropagation" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>propagates ORCID among results linked by allowedsemrels semantic relationships</DESCRIPTION>
|
<DESCRIPTION>propagates ORCID among results linked by allowedsemrels semantic relationships</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -417,7 +424,7 @@
|
||||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||||
{
|
{
|
||||||
'sourcePath' : 'cleanedGraphPath',
|
'sourcePath' : 'consistentGraphPath',
|
||||||
'outputPath': 'orcidGraphPath'
|
'outputPath': 'orcidGraphPath'
|
||||||
}
|
}
|
||||||
</PARAM>
|
</PARAM>
|
||||||
|
@ -435,6 +442,7 @@
|
||||||
<ARC to="bulkTagging"/>
|
<ARC to="bulkTagging"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="bulkTagging" type="SubmitHadoopJob">
|
<NODE name="bulkTagging" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>mark results respecting some rules as belonging to communities</DESCRIPTION>
|
<DESCRIPTION>mark results respecting some rules as belonging to communities</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -460,6 +468,7 @@
|
||||||
<ARC to="affiliationPropagation"/>
|
<ARC to="affiliationPropagation"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="affiliationPropagation" type="SubmitHadoopJob">
|
<NODE name="affiliationPropagation" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>creates relashionships between results and organizations when the organizations are associated to institutional repositories</DESCRIPTION>
|
<DESCRIPTION>creates relashionships between results and organizations when the organizations are associated to institutional repositories</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -484,6 +493,7 @@
|
||||||
<ARC to="communityOrganizationPropagation"/>
|
<ARC to="communityOrganizationPropagation"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="communityOrganizationPropagation" type="SubmitHadoopJob">
|
<NODE name="communityOrganizationPropagation" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap</DESCRIPTION>
|
<DESCRIPTION>marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -509,6 +519,7 @@
|
||||||
<ARC to="resultProjectPropagation"/>
|
<ARC to="resultProjectPropagation"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="resultProjectPropagation" type="SubmitHadoopJob">
|
<NODE name="resultProjectPropagation" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects</DESCRIPTION>
|
<DESCRIPTION>created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects</DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -534,6 +545,7 @@
|
||||||
<ARC to="communitySemrelPropagation"/>
|
<ARC to="communitySemrelPropagation"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="communitySemrelPropagation" type="SubmitHadoopJob">
|
<NODE name="communitySemrelPropagation" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities </DESCRIPTION>
|
<DESCRIPTION>tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities </DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -560,6 +572,7 @@
|
||||||
<ARC to="countryPropagation"/>
|
<ARC to="countryPropagation"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="countryPropagation" type="SubmitHadoopJob">
|
<NODE name="countryPropagation" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from </DESCRIPTION>
|
<DESCRIPTION>associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from </DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -584,10 +597,36 @@
|
||||||
</PARAM>
|
</PARAM>
|
||||||
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||||
</PARAMETERS>
|
</PARAMETERS>
|
||||||
|
<ARCS>
|
||||||
|
<ARC to="graphCleaning"/>
|
||||||
|
</ARCS>
|
||||||
|
</NODE>
|
||||||
|
|
||||||
|
<NODE name="graphCleaning" type="SubmitHadoopJob">
|
||||||
|
<DESCRIPTION>clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
|
||||||
|
<PARAMETERS>
|
||||||
|
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
||||||
|
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||||
|
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||||
|
{
|
||||||
|
'graphInputPath' : 'countryGraphPath',
|
||||||
|
'graphOutputPath': 'cleanedGraphPath',
|
||||||
|
'isLookupUrl': 'isLookUpUrl'
|
||||||
|
}
|
||||||
|
</PARAM>
|
||||||
|
<PARAM managedBy="system" name="params" required="true" type="string">
|
||||||
|
{
|
||||||
|
'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app',
|
||||||
|
'workingPath' : '/tmp/beta_provision/working_dir/clean'
|
||||||
|
}
|
||||||
|
</PARAM>
|
||||||
|
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
|
||||||
|
</PARAMETERS>
|
||||||
<ARCS>
|
<ARCS>
|
||||||
<ARC to="blacklistRelations"/>
|
<ARC to="blacklistRelations"/>
|
||||||
</ARCS>
|
</ARCS>
|
||||||
</NODE>
|
</NODE>
|
||||||
|
|
||||||
<NODE name="blacklistRelations" type="SubmitHadoopJob">
|
<NODE name="blacklistRelations" type="SubmitHadoopJob">
|
||||||
<DESCRIPTION>removes blacklisted relations </DESCRIPTION>
|
<DESCRIPTION>removes blacklisted relations </DESCRIPTION>
|
||||||
<PARAMETERS>
|
<PARAMETERS>
|
||||||
|
@ -595,7 +634,7 @@
|
||||||
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
|
||||||
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
||||||
{
|
{
|
||||||
'sourcePath' : 'countryGraphPath',
|
'sourcePath' : 'cleanedGraphPath',
|
||||||
'outputPath': 'blacklistedGraphPath'
|
'outputPath': 'blacklistedGraphPath'
|
||||||
}
|
}
|
||||||
</PARAM>
|
</PARAM>
|
||||||
|
|
Loading…
Reference in New Issue