[EOSC TAG] adding eosc interoperability guidelines in the specific element in the result. Removed from subjects. Removed also the deletion of EOSC Jupyter Notebook from subject since now the criteria are searchd for in a different place

This commit is contained in:
Miriam Baglioni 2022-07-20 18:07:54 +02:00
parent 18b505d6a3
commit 438abdf96f
2 changed files with 27 additions and 57 deletions

View File

@ -28,28 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class SparkEoscTag { public class SparkEoscTag {
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class); private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static final Qualifier EOSC_QUALIFIER = OafMapperUtils
.qualifier(
"EOSC",
"European Open Science Cloud",
ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES);
public static final DataInfo EOSC_DATAINFO = OafMapperUtils
.dataInfo(
false, "propagation", true, false,
OafMapperUtils
.qualifier(
"propagation:subject", "Inferred by OpenAIRE",
ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
"0.9");
public final static StructuredProperty EOSC_NOTEBOOK = OafMapperUtils
.structuredProperty(
"EOSC::Jupyter Notebook", EOSC_QUALIFIER, EOSC_DATAINFO);
public final static StructuredProperty EOSC_GALAXY = OafMapperUtils
.structuredProperty(
"EOSC::Galaxy Workflow", EOSC_QUALIFIER, EOSC_DATAINFO);
public final static StructuredProperty EOSC_TWITTER = OafMapperUtils
.structuredProperty(
"EOSC::Twitter Data", EOSC_QUALIFIER, EOSC_DATAINFO);
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
@ -84,29 +63,30 @@ public class SparkEoscTag {
}); });
} }
public static EoscIfGuidelines newInstance(String code, String label, String url, String semantics){
EoscIfGuidelines eig = new EoscIfGuidelines();
eig.setCode( code);
eig.setLabel(label);
eig.setUrl(url);
eig.setSemanticRelation(semantics);
return eig;
}
private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) { private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
readPath(spark, inputPath + "/software", Software.class) readPath(spark, inputPath + "/software", Software.class)
.map((MapFunction<Software, Software>) s -> { .map((MapFunction<Software, Software>) s -> {
List<StructuredProperty> sbject;
if (!Optional.ofNullable(s.getSubject()).isPresent())
s.setSubject(new ArrayList<>());
sbject = s.getSubject();
if (containsCriteriaNotebook(s)) { if (containsCriteriaNotebook(s)) {
sbject.add(EOSC_NOTEBOOK); if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))) { s.setEoscifguidelines(new ArrayList<>());
sbject = sbject.stream().map(sb -> { s.getEoscifguidelines().add(newInstance("EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "", "compliesWith"));
if (sb.getValue().equals("EOSC Jupyter Notebook")) {
return null;
}
return sb;
}).filter(Objects::nonNull).collect(Collectors.toList());
s.setSubject(sbject);
}
} }
if (containsCriteriaGalaxy(s)) { if (containsCriteriaGalaxy(s)) {
sbject.add(EOSC_GALAXY); if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
s.setEoscifguidelines(new ArrayList<>());
s.getEoscifguidelines().add(newInstance("EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith"));
} }
return s; return s;
}, Encoders.bean(Software.class)) }, Encoders.bean(Software.class))
@ -124,14 +104,14 @@ public class SparkEoscTag {
readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class) readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class)
.map((MapFunction<OtherResearchProduct, OtherResearchProduct>) orp -> { .map((MapFunction<OtherResearchProduct, OtherResearchProduct>) orp -> {
List<StructuredProperty> sbject; List<StructuredProperty> sbject;
if (!Optional.ofNullable(orp.getSubject()).isPresent()) if (!Optional.ofNullable(orp.getEoscifguidelines()).isPresent())
orp.setSubject(new ArrayList<>()); orp.setEoscifguidelines(new ArrayList<>());
sbject = orp.getSubject();
if (containsCriteriaGalaxy(orp)) { if (containsCriteriaGalaxy(orp)) {
sbject.add(EOSC_GALAXY); orp.getEoscifguidelines().add(newInstance("EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith"));
} }
if (containscriteriaTwitter(orp)) { if (containscriteriaTwitter(orp)) {
sbject.add(EOSC_TWITTER); orp.getEoscifguidelines().add(newInstance("EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith"));
} }
return orp; return orp;
}, Encoders.bean(OtherResearchProduct.class)) }, Encoders.bean(OtherResearchProduct.class))
@ -149,11 +129,10 @@ public class SparkEoscTag {
readPath(spark, inputPath + "/dataset", Dataset.class) readPath(spark, inputPath + "/dataset", Dataset.class)
.map((MapFunction<Dataset, Dataset>) d -> { .map((MapFunction<Dataset, Dataset>) d -> {
List<StructuredProperty> sbject; List<StructuredProperty> sbject;
if (!Optional.ofNullable(d.getSubject()).isPresent()) if (!Optional.ofNullable(d.getEoscifguidelines()).isPresent())
d.setSubject(new ArrayList<>()); d.setEoscifguidelines(new ArrayList<>());
sbject = d.getSubject();
if (containscriteriaTwitter(d)) { if (containscriteriaTwitter(d)) {
sbject.add(EOSC_TWITTER); d.getEoscifguidelines().add(newInstance("EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith"));
} }
return d; return d;
}, Encoders.bean(Dataset.class)) }, Encoders.bean(Dataset.class))
@ -212,13 +191,6 @@ public class SparkEoscTag {
return false; return false;
} }
private static Set<String> getSubjects(List<StructuredProperty> s) {
Set<String> subjects = new HashSet<>();
s.stream().forEach(sbj -> subjects.addAll(Arrays.asList(sbj.getValue().toLowerCase().split(" "))));
s.stream().forEach(sbj -> subjects.add(sbj.getValue().toLowerCase()));
return subjects;
}
private static Set<String> getWordsSP(List<StructuredProperty> elem) { private static Set<String> getWordsSP(List<StructuredProperty> elem) {
Set<String> words = new HashSet<>(); Set<String> words = new HashSet<>();
Optional Optional
@ -242,9 +214,7 @@ public class SparkEoscTag {
t -> words t -> words
.addAll( .addAll(
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))))); Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
// elem
// .forEach(
// t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
return words; return words;
} }

View File

@ -801,7 +801,7 @@
<mockito-core.version>3.3.3</mockito-core.version> <mockito-core.version>3.3.3</mockito-core.version>
<mongodb.driver.version>3.4.2</mongodb.driver.version> <mongodb.driver.version>3.4.2</mongodb.driver.version>
<vtd.version>[2.12,3.0)</vtd.version> <vtd.version>[2.12,3.0)</vtd.version>
<dhp-schemas.version>[2.12.0]</dhp-schemas.version> <dhp-schemas.version>[2.12.2-SNAPSHOT]</dhp-schemas.version>
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version> <dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version> <dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version> <dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>