From 438abdf96fcdc340dfdbe18fa51c56c8a3ce5657 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 Jul 2022 18:07:54 +0200 Subject: [PATCH] [EOSC TAG] adding eosc interoperability guidelines in the specific element in the result. Removed from subjects. Removed also the deletion of EOSC Jupyter Notebook from subject since now the criteria are searchd for in a different place --- .../eu/dnetlib/dhp/bulktag/SparkEoscTag.java | 82 ++++++------------- pom.xml | 2 +- 2 files changed, 27 insertions(+), 57 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java index b9de5dd11..d31934081 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkEoscTag.java @@ -28,28 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class SparkEoscTag { private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static final Qualifier EOSC_QUALIFIER = OafMapperUtils - .qualifier( - "EOSC", - "European Open Science Cloud", - ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES); - public static final DataInfo EOSC_DATAINFO = OafMapperUtils - .dataInfo( - false, "propagation", true, false, - OafMapperUtils - .qualifier( - "propagation:subject", "Inferred by OpenAIRE", - ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), - "0.9"); - public final static StructuredProperty EOSC_NOTEBOOK = OafMapperUtils - .structuredProperty( - "EOSC::Jupyter Notebook", EOSC_QUALIFIER, EOSC_DATAINFO); - public final static StructuredProperty EOSC_GALAXY = OafMapperUtils - .structuredProperty( - "EOSC::Galaxy Workflow", EOSC_QUALIFIER, EOSC_DATAINFO); - public final static StructuredProperty EOSC_TWITTER = OafMapperUtils - .structuredProperty( - "EOSC::Twitter Data", EOSC_QUALIFIER, EOSC_DATAINFO); + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -84,29 +63,30 @@ public class SparkEoscTag { }); } + public static EoscIfGuidelines newInstance(String code, String label, String url, String semantics){ + EoscIfGuidelines eig = new EoscIfGuidelines(); + eig.setCode( code); + eig.setLabel(label); + eig.setUrl(url); + eig.setSemanticRelation(semantics); + return eig; + + } private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) { readPath(spark, inputPath + "/software", Software.class) .map((MapFunction) s -> { - List sbject; - if (!Optional.ofNullable(s.getSubject()).isPresent()) - s.setSubject(new ArrayList<>()); - sbject = s.getSubject(); if (containsCriteriaNotebook(s)) { - sbject.add(EOSC_NOTEBOOK); - if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))) { - sbject = sbject.stream().map(sb -> { - if (sb.getValue().equals("EOSC Jupyter Notebook")) { - return null; - } - return sb; - }).filter(Objects::nonNull).collect(Collectors.toList()); - s.setSubject(sbject); - } + if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent()) + s.setEoscifguidelines(new ArrayList<>()); + s.getEoscifguidelines().add(newInstance("EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "", "compliesWith")); } if (containsCriteriaGalaxy(s)) { - sbject.add(EOSC_GALAXY); + if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent()) + s.setEoscifguidelines(new ArrayList<>()); + + s.getEoscifguidelines().add(newInstance("EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith")); } return s; }, Encoders.bean(Software.class)) @@ -124,14 +104,14 @@ public class SparkEoscTag { readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class) .map((MapFunction) orp -> { List sbject; - if (!Optional.ofNullable(orp.getSubject()).isPresent()) - orp.setSubject(new ArrayList<>()); - sbject = orp.getSubject(); + if (!Optional.ofNullable(orp.getEoscifguidelines()).isPresent()) + orp.setEoscifguidelines(new ArrayList<>()); + if (containsCriteriaGalaxy(orp)) { - sbject.add(EOSC_GALAXY); + orp.getEoscifguidelines().add(newInstance("EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith")); } if (containscriteriaTwitter(orp)) { - sbject.add(EOSC_TWITTER); + orp.getEoscifguidelines().add(newInstance("EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith")); } return orp; }, Encoders.bean(OtherResearchProduct.class)) @@ -149,11 +129,10 @@ public class SparkEoscTag { readPath(spark, inputPath + "/dataset", Dataset.class) .map((MapFunction) d -> { List sbject; - if (!Optional.ofNullable(d.getSubject()).isPresent()) - d.setSubject(new ArrayList<>()); - sbject = d.getSubject(); + if (!Optional.ofNullable(d.getEoscifguidelines()).isPresent()) + d.setEoscifguidelines(new ArrayList<>()); if (containscriteriaTwitter(d)) { - sbject.add(EOSC_TWITTER); + d.getEoscifguidelines().add(newInstance("EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith")); } return d; }, Encoders.bean(Dataset.class)) @@ -212,13 +191,6 @@ public class SparkEoscTag { return false; } - private static Set getSubjects(List s) { - Set subjects = new HashSet<>(); - s.stream().forEach(sbj -> subjects.addAll(Arrays.asList(sbj.getValue().toLowerCase().split(" ")))); - s.stream().forEach(sbj -> subjects.add(sbj.getValue().toLowerCase())); - return subjects; - } - private static Set getWordsSP(List elem) { Set words = new HashSet<>(); Optional @@ -242,9 +214,7 @@ public class SparkEoscTag { t -> words .addAll( Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))))); -// elem -// .forEach( -// t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))); + return words; } diff --git a/pom.xml b/pom.xml index 54070f654..973bc3773 100644 --- a/pom.xml +++ b/pom.xml @@ -801,7 +801,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.12.0] + [2.12.2-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6]