diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java index c5feb8d3ff..3db584c761 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java @@ -5,9 +5,10 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import com.google.gson.Gson; import org.apache.avro.generic.GenericData; +import com.google.gson.Gson; + /** Created by miriam on 01/08/2018. */ public class Community implements Serializable { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 8e34ccebfb..66407a4b37 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -77,7 +77,7 @@ public class CommunityConfiguration implements Serializable { if (zenodocommunityMap == null) { zenodocommunityMap = Maps.newHashMap(); } - if(selectionConstraintsMap == null){ + if (selectionConstraintsMap == null) { selectionConstraintsMap = Maps.newHashMap(); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index 7a43ca476a..52ca606fc7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -91,10 +91,11 @@ public class CommunityConfigurationFactory { private static SelectionConstraints parseConstrains(Node node) { Node aconstraints = node.selectSingleNode("./advancedConstraints"); - if(aconstraints == null){ + if (aconstraints == null) { return null; } - SelectionConstraints selectionConstraints = new Gson().fromJson(aconstraints.getText(), SelectionConstraints.class); + SelectionConstraints selectionConstraints = new Gson() + .fromJson(aconstraints.getText(), SelectionConstraints.class); selectionConstraints.setSelection(resolver); return selectionConstraints; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java index 9803c5e631..f06c0d47a7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java @@ -18,8 +18,8 @@ public class QueryInformationSystem { + " let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept " + " let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept " + " let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept " - + " let $fos := $x//CONFIGURATION/context/param[./@name='fos']/text() " - + " let $sdg := $x//CONFIGURATION/context/param[./@name='sdg']/text() " + + " let $fos := $x//CONFIGURATION/context/param[./@name='fos']/text() " + + " let $sdg := $x//CONFIGURATION/context/param[./@name='sdg']/text() " + "let $zenodo := $x//param[./@name='zenodoCommunity']/text() " + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden' " @@ -30,12 +30,12 @@ public class QueryInformationSystem { + " {for $y in tokenize($subj,',') " + " return " + " {$y}} " - + " {for $y in tokenize($fos,',') " - + " return " - + " {$y}} " - + " {for $y in tokenize($sdg,',') " - + " return " - + " {$y}} " + + " {for $y in tokenize($fos,',') " + + " return " + + " {$y}} " + + " {for $y in tokenize($sdg,',') " + + " return " + + " {$y}} " + " " + " " + " {for $d in $datasources " @@ -69,9 +69,9 @@ public class QueryInformationSystem { + " " + " } " + " " - + "" - +"{$x//CONFIGURATION/context/param[./@name='advancedConstraint']/text()} " - + "" + + "" + + "{$x//CONFIGURATION/context/param[./@name='advancedConstraint']/text()} " + + "" + " "; public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index ccb69a97d0..ee75bf955a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -9,16 +9,16 @@ import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import org.apache.commons.lang3.StringUtils; import com.google.gson.Gson; import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.JsonPath; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; /** Created by miriam on 02/08/2018. */ public class ResultTagger implements Serializable { @@ -134,13 +134,19 @@ public class ResultTagger implements Serializable { /* Tagging for Advanced Constraints */ final Set aconstraints = new HashSet<>(); - conf.getSelectionConstraintsMap().keySet() - .forEach(communityId -> { - if(conf.getSelectionConstraintsMap().get(communityId) != null && - conf.getSelectionConstraintsMap().get(communityId) - .getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param))) - aconstraints.add(communityId); - }); + conf + .getSelectionConstraintsMap() + .keySet() + .forEach(communityId -> { + if (conf.getSelectionConstraintsMap().get(communityId) != null && + conf + .getSelectionConstraintsMap() + .get(communityId) + .getCriteria() + .stream() + .anyMatch(crit -> crit.verifyCriteria(param))) + aconstraints.add(communityId); + }); communities.addAll(aconstraints); @@ -163,21 +169,48 @@ public class ResultTagger implements Serializable { } if (subjects.contains(c)) dataInfoList - .add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, - OafMapperUtils.qualifier(CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); + .add( + OafMapperUtils + .dataInfo( + false, BULKTAG_DATA_INFO_TYPE, true, false, + OafMapperUtils + .qualifier( + CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, + DNET_PROVENANCE_ACTIONS), + TAGGING_TRUST)); if (datasources.contains(c)) dataInfoList - .add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, - OafMapperUtils.qualifier(CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); + .add( + OafMapperUtils + .dataInfo( + false, BULKTAG_DATA_INFO_TYPE, true, false, + OafMapperUtils + .qualifier( + CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS, + DNET_PROVENANCE_ACTIONS), + TAGGING_TRUST)); if (czenodo.contains(c)) dataInfoList - .add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, - OafMapperUtils.qualifier(CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); + .add( + OafMapperUtils + .dataInfo( + false, BULKTAG_DATA_INFO_TYPE, true, false, + OafMapperUtils + .qualifier( + CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, + DNET_PROVENANCE_ACTIONS), + TAGGING_TRUST)); if (aconstraints.contains(c)) dataInfoList - .add( - OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, - OafMapperUtils.qualifier(CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); + .add( + OafMapperUtils + .dataInfo( + false, BULKTAG_DATA_INFO_TYPE, true, false, + OafMapperUtils + .qualifier( + CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT, + DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), + TAGGING_TRUST)); } }); @@ -198,21 +231,48 @@ public class ResultTagger implements Serializable { List dataInfoList = new ArrayList<>(); if (subjects.contains(c)) dataInfoList - .add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, - OafMapperUtils.qualifier(CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); + .add( + OafMapperUtils + .dataInfo( + false, BULKTAG_DATA_INFO_TYPE, true, false, + OafMapperUtils + .qualifier( + CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS, + DNET_PROVENANCE_ACTIONS), + TAGGING_TRUST)); if (datasources.contains(c)) dataInfoList - .add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, - OafMapperUtils.qualifier(CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); + .add( + OafMapperUtils + .dataInfo( + false, BULKTAG_DATA_INFO_TYPE, true, false, + OafMapperUtils + .qualifier( + CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, + DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), + TAGGING_TRUST)); if (czenodo.contains(c)) dataInfoList - .add(OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, - OafMapperUtils.qualifier(CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); + .add( + OafMapperUtils + .dataInfo( + false, BULKTAG_DATA_INFO_TYPE, true, false, + OafMapperUtils + .qualifier( + CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS, + DNET_PROVENANCE_ACTIONS), + TAGGING_TRUST)); if (aconstraints.contains(c)) dataInfoList - .add( - OafMapperUtils.dataInfo(false, BULKTAG_DATA_INFO_TYPE, true, false, - OafMapperUtils.qualifier(CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), TAGGING_TRUST)); + .add( + OafMapperUtils + .dataInfo( + false, BULKTAG_DATA_INFO_TYPE, true, false, + OafMapperUtils + .qualifier( + CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT, + DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), + TAGGING_TRUST)); context.setDataInfo(dataInfoList); return context; diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 0212f4a49d..def524156b 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -11,11 +11,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import com.google.gson.Gson; -import com.jayway.jsonpath.DocumentContext; -import com.jayway.jsonpath.JsonPath; -import eu.dnetlib.dhp.bulktag.community.ProtoMap; -import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -32,6 +27,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.Gson; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.JsonPath; + +import eu.dnetlib.dhp.bulktag.community.ProtoMap; +import eu.dnetlib.dhp.schema.oaf.*; public class BulkTagJobTest { @@ -44,7 +45,7 @@ public class BulkTagJobTest { + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + " \"contributor\" : \"$['contributor'][*]['value']\"," + " \"description\" : \"$['description'][*]['value']\", " - +" \"subject\" :\"$['subject'][*]['value']\" }"; + + " \"subject\" :\"$['subject'][*]['value']\" }"; private static SparkSession spark; @@ -774,8 +775,8 @@ public class BulkTagJobTest { .assertEquals( 3, idExplodeCommunity.filter("provenance = 'community:datasource'").count()); Assertions - .assertEquals( - 1, idExplodeCommunity.filter("provenance = 'community:advconstraint'").count()); + .assertEquals( + 1, idExplodeCommunity.filter("provenance = 'community:advconstraint'").count()); } // @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java index 30991e9d7a..c150c63df1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java @@ -10,7 +10,6 @@ import java.util.stream.Collectors; import javax.swing.text.html.Option; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -33,6 +32,7 @@ import eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob; import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; public class CleanCountrySparkJob implements Serializable { private static final Logger log = LoggerFactory.getLogger(CleanCountrySparkJob.class); @@ -113,7 +113,10 @@ public class CleanCountrySparkJob implements Serializable { if (r .getPid() .stream() - .anyMatch(p -> p.getQualifier().getClassid() + .anyMatch( + p -> p + .getQualifier() + .getClassid() .equals(PidType.doi) && pidInParam(p.getValue(), verifyParam))) { r .setCountry( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 2b1c257ad4..c69a7a6ffa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -422,7 +422,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final Relation r2 = OafMapperUtils .getRelation( - orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, collectedFrom, info, lastUpdateTimestamp); + orgId, dsId, DATASOURCE_ORGANIZATION, PROVISION, PROVIDES, collectedFrom, info, + lastUpdateTimestamp); return Arrays.asList(r1, r2); } catch (final Exception e) {