From 27a91841e7fa2a1b615b4d1e161d606db5bead96 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 4 Aug 2022 11:39:39 +0200 Subject: [PATCH] WIP: cleaning of subjects --- .../oaf/utils/GraphCleaningFunctions.java | 5 ++ .../dhp/schema/oaf/utils/OafMapperUtils.java | 25 ++++++++++ .../dnetlib/dhp/actionmanager/Constants.java | 13 ++--- .../PrepareFOSSparkJob.java | 3 +- .../PrepareSDGSparkJob.java | 3 +- .../DataciteToOAFTransformation.scala | 6 +-- .../eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala | 2 +- .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 4 +- .../createunresolvedentities/ProduceTest.java | 10 ++-- .../dhp/broker/oa/util/ConversionUtils.java | 33 +++++++------ .../doiboost/DoiBoostMappingUtil.scala | 22 +++++++++ .../doiboost/crossref/Crossref2Oaf.scala | 2 +- .../dnetlib/doiboost/mag/MagDataModel.scala | 12 ++--- .../dnetlib/dhp/bulktag/EOSCTagJobTest.java | 2 +- .../dhp/oa/graph/clean/CleaningRuleMap.java | 1 + .../dhp/oa/graph/dump/ResultMapper.java | 11 ++--- .../raw/AbstractMdRecordToOafMapper.java | 49 +++++++------------ .../dhp/oa/graph/raw/OafToOafMapper.java | 4 +- .../dhp/oa/graph/raw/OdfToOafMapper.java | 4 +- .../resolution/ResolveEntitiesTest.scala | 4 +- pom.xml | 2 +- 21 files changed, 132 insertions(+), 85 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 351bd2dd5f..151c536853 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -520,6 +520,11 @@ public class GraphCleaningFunctions extends CleaningFunctions { return s; } + protected static Subject cleanValue(Subject s) { + s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " ")); + return s; + } + protected static Field cleanValue(Field s) { s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " ")); return s; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 6f452e846f..d58b354abd 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -189,6 +189,17 @@ public class OafMapperUtils { return q; } + public static Subject subject( + final String value, + final String classid, + final String classname, + final String schemeid, + final String schemename, + final DataInfo dataInfo) { + + return subject(value, qualifier(classid, classname, schemeid, schemename), dataInfo); + } + public static StructuredProperty structuredProperty( final String value, final String classid, @@ -200,6 +211,20 @@ public class OafMapperUtils { return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo); } + public static Subject subject( + final String value, + final Qualifier qualifier, + final DataInfo dataInfo) { + if (value == null) { + return null; + } + final Subject s = new Subject(); + s.setValue(value); + s.setQualifier(qualifier); + s.setDataInfo(dataInfo); + return s; + } + public static StructuredProperty structuredProperty( final String value, final Qualifier qualifier, diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index aa25ca6335..bd223d7c96 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -13,6 +13,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class Constants { @@ -58,13 +59,13 @@ public class Constants { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } - public static StructuredProperty getSubject(String sbj, String classid, String classname, + public static Subject getSubject(String sbj, String classid, String classname, String diqualifierclassid) { if (sbj.equals(NULL)) return null; - StructuredProperty sp = new StructuredProperty(); - sp.setValue(sbj); - sp + Subject s = new Subject(); + s.setValue(sbj); + s .setQualifier( OafMapperUtils .qualifier( @@ -72,7 +73,7 @@ public class Constants { classname, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES)); - sp + s .setDataInfo( OafMapperUtils .dataInfo( @@ -88,7 +89,7 @@ public class Constants { ModelConstants.DNET_PROVENANCE_ACTIONS), "")); - return sp; + return s; } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 55e3919320..4d2d252157 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -24,6 +24,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; @@ -79,7 +80,7 @@ public class PrepareFOSSparkJob implements Serializable { HashSet level3 = new HashSet<>(); addLevels(level1, level2, level3, first); it.forEachRemaining(v -> addLevels(level1, level2, level3, v)); - List sbjs = new ArrayList<>(); + List sbjs = new ArrayList<>(); level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java index a31e380fe9..bfdf142342 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java @@ -24,6 +24,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; @@ -73,7 +74,7 @@ public class PrepareSDGSparkJob implements Serializable { Result r = new Result(); r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); SDGDataModel first = it.next(); - List sbjs = new ArrayList<>(); + List sbjs = new ArrayList<>(); sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)); it .forEachRemaining( diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 088a07427d..c29614d33e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -19,7 +19,7 @@ import java.time.chrono.ThaiBuddhistDate import java.time.format.DateTimeFormatter import java.util.{Date, Locale} import scala.collection.JavaConverters._ -import scala.io.{Codec, Source} +import scala.io.Source object DataciteToOAFTransformation { @@ -252,7 +252,7 @@ object DataciteToOAFTransformation { .exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue)) if (hosted_by_figshare) { r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT())) - val l: List[StructuredProperty] = List() + val l: List[Subject] = List() r.setSubject(l.asJava) } } @@ -492,7 +492,7 @@ object DataciteToOAFTransformation { subjects .filter(s => s.subject.nonEmpty) .map(s => - OafMapperUtils.structuredProperty( + OafMapperUtils.subject( s.subject.get, SUBJ_CLASS, SUBJ_CLASS, diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index ffdab1799a..6703235984 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -281,7 +281,7 @@ object BioDBToOAF { d.setSubject( subjects .map(s => - OafMapperUtils.structuredProperty( + OafMapperUtils.subject( s, SUBJ_CLASS, SUBJ_CLASS, diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index 42bafc93eb..410686f972 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -265,8 +265,8 @@ object PubMedToOaf { result.setLanguage(term) } - val subjects: List[StructuredProperty] = article.getSubjects.asScala.map(s => - OafMapperUtils.structuredProperty( + val subjects: List[Subject] = article.getSubjects.asScala.map(s => + OafMapperUtils.subject( s.getValue, SUBJ_CLASS, SUBJ_CLASS, diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index a5ecaeabff..c3c110f091 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -72,7 +72,7 @@ public class ProduceTest { JavaRDD tmp = getResultJavaRDD(); - List sbjs = tmp + List sbjs = tmp .filter(row -> row.getSubject() != null && row.getSubject().size() > 0) .flatMap(row -> row.getSubject().iterator()) .collect(); @@ -169,7 +169,7 @@ public class ProduceTest { .getSubject() .size()); - List sbjs = tmp + List sbjs = tmp .filter(row -> row.getId().equals(doi)) .flatMap(row -> row.getSubject().iterator()) .collect(); @@ -396,7 +396,7 @@ public class ProduceTest { .getSubject() .size()); - List sbjs = tmp + List sbjs = tmp .filter(row -> row.getId().equals(doi)) .flatMap(row -> row.getSubject().iterator()) .collect(); @@ -508,7 +508,7 @@ public class ProduceTest { .getSubject() .size()); - List sbjs = tmp + List sbjs = tmp .filter(row -> row.getId().equals(doi)) .flatMap(row -> row.getSubject().iterator()) .collect(); @@ -537,7 +537,7 @@ public class ProduceTest { JavaRDD tmp = getResultJavaRDDPlusSDG(); - List sbjs_sdg = tmp + List sbjs_sdg = tmp .filter(row -> row.getSubject() != null && row.getSubject().size() > 0) .flatMap(row -> row.getSubject().iterator()) .filter(sbj -> sbj.getQualifier().getClassid().equals(Constants.SDG_CLASS_ID)) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java index bc37203d3c..5e7adec794 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java @@ -26,20 +26,7 @@ import eu.dnetlib.broker.objects.OaBrokerRelatedPublication; import eu.dnetlib.broker.objects.OaBrokerRelatedSoftware; import eu.dnetlib.broker.objects.OaBrokerTypedValue; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.ExternalReference; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.Project; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.*; public class ConversionUtils { @@ -71,6 +58,10 @@ public class ConversionUtils { return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null; } + public static OaBrokerTypedValue oafSubjectToBrokerTypedValue(final Subject sp) { + return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null; + } + public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { if (d == null) { return null; @@ -115,7 +106,7 @@ public class ConversionUtils { res.setTitles(structPropList(result.getTitle())); res.setAbstracts(fieldList(result.getDescription())); res.setLanguage(classId(result.getLanguage())); - res.setSubjects(structPropTypedList(result.getSubject())); + res.setSubjects(subjectList(result.getSubject())); res.setCreators(mappedList(result.getAuthor(), ConversionUtils::oafAuthorToBrokerAuthor)); res.setPublicationdate(fieldValue(result.getDateofacceptance())); res.setPublisher(fieldValue(result.getPublisher())); @@ -304,6 +295,18 @@ public class ConversionUtils { .collect(Collectors.toList()); } + private static List subjectList(final List list) { + if (list == null) { + return new ArrayList<>(); + } + + return list + .stream() + .map(ConversionUtils::oafSubjectToBrokerTypedValue) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + } + private static List mappedList(final List list, final Function func) { if (list == null) { return new ArrayList<>(); diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 20471973a1..4789093cda 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -391,6 +391,28 @@ object DoiBoostMappingUtil { di } + def createSubject(value: String, classId: String, schemeId: String): Subject = { + val s = new Subject + s.setQualifier(OafMapperUtils.qualifier(classId, classId, schemeId, schemeId)) + s.setValue(value) + s + + } + + def createSubject( + value: String, + classId: String, + className: String, + schemeId: String, + schemeName: String + ): Subject = { + val s = new Subject + s.setQualifier(OafMapperUtils.qualifier(classId, className, schemeId, schemeName)) + s.setValue(value) + s + + } + def createSP( value: String, classId: String, diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 9eec9e7590..7fb10863f6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -201,7 +201,7 @@ case object Crossref2Oaf { if (subjectList.nonEmpty) { result.setSubject( - subjectList.map(s => createSP(s, "keywords", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava + subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava ) } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala index 18ba864ce9..9a0b0d8455 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/mag/MagDataModel.scala @@ -2,7 +2,7 @@ package eu.dnetlib.doiboost.mag import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory -import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty} +import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty, Subject} import eu.dnetlib.doiboost.DoiBoostMappingUtil import eu.dnetlib.doiboost.DoiBoostMappingUtil._ import org.json4s @@ -210,8 +210,8 @@ case object ConversionUtil { val className = "Microsoft Academic Graph classification" val classid = "MAG" - val p: List[StructuredProperty] = fieldOfStudy.subjects.flatMap(s => { - val s1 = createSP( + val p: List[Subject] = fieldOfStudy.subjects.flatMap(s => { + val s1 = createSubject( s.DisplayName, classid, className, @@ -219,10 +219,10 @@ case object ConversionUtil { ModelConstants.DNET_SUBJECT_TYPOLOGIES ) val di = DoiBoostMappingUtil.generateDataInfo(s.Score.toString) - var resList: List[StructuredProperty] = List(s1) + var resList: List[Subject] = List(s1) if (s.MainType.isDefined) { val maintp = s.MainType.get - val s2 = createSP( + val s2 = createSubject( s.MainType.get, classid, className, @@ -232,7 +232,7 @@ case object ConversionUtil { s2.setDataInfo(di) resList = resList ::: List(s2) if (maintp.contains(".")) { - val s3 = createSP( + val s3 = createSubject( maintp.split("\\.").head, classid, className, diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java index bfe4f64482..a640e20099 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java @@ -290,7 +290,7 @@ public class EOSCTagJobTest { .stream() .anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook"))); - List subjects = tmp + List subjects = tmp .filter(sw -> sw.getId().equals("50|od______1582::6e7a9b21a2feef45673890432af34244")) .collect() .get(0) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index 7a35832892..6c156edb75 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -27,6 +27,7 @@ public class CleaningRuleMap extends HashMap, SerializableConsumer cleanQualifier(vocabularies, (AccessRight) o)); mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o)); mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o)); + return mapping; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index aad3a87062..510f9c3a69 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -17,6 +17,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.Instance; import eu.dnetlib.dhp.schema.dump.oaf.Measure; import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; import eu.dnetlib.dhp.schema.dump.oaf.Result; +import eu.dnetlib.dhp.schema.dump.oaf.Subject; import eu.dnetlib.dhp.schema.dump.oaf.community.CfHbKeyValue; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; @@ -66,7 +67,7 @@ public class ResultMapper implements Serializable { final List contributorList = new ArrayList<>(); Optional .ofNullable(input.getContributor()) - .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue()))); + .ifPresent(value -> value.forEach(c -> contributorList.add(c.getValue()))); out.setContributor(contributorList); Optional @@ -103,7 +104,7 @@ public class ResultMapper implements Serializable { final List coverageList = new ArrayList<>(); Optional .ofNullable(input.getCoverage()) - .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue()))); + .ifPresent(value -> value.forEach(c -> coverageList.add(c.getValue()))); out.setCoverage(coverageList); out.setDateofcollection(input.getDateofcollection()); @@ -114,14 +115,12 @@ public class ResultMapper implements Serializable { .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); out.setDescription(descriptionList); Optional> oStr = Optional.ofNullable(input.getEmbargoenddate()); - if (oStr.isPresent()) { - out.setEmbargoenddate(oStr.get().getValue()); - } + oStr.ifPresent(stringField -> out.setEmbargoenddate(stringField.getValue())); final List formatList = new ArrayList<>(); Optional .ofNullable(input.getFormat()) - .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); + .ifPresent(value -> value.forEach(f -> formatList.add(f.getValue()))); out.setFormat(formatList); out.setId(input.getId()); out.setOriginalId(new ArrayList<>()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index e06327b2a2..da1c764e85 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -8,15 +8,7 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS; import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.keyValue; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.oaiIProvenance; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; import java.util.*; @@ -29,26 +21,7 @@ import com.google.common.collect.Sets; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.AccessRight; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Context; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.GeoLocation; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.OAIProvenance; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @@ -411,7 +384,7 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List prepareTitles(Document doc, DataInfo info); - protected abstract List prepareSubjects(Document doc, DataInfo info); + protected abstract List prepareSubjects(Document doc, DataInfo info); protected abstract Qualifier prepareLanguages(Document doc); @@ -559,6 +532,22 @@ public abstract class AbstractMdRecordToOafMapper { return res; } + protected List prepareSubjectList( + final Node node, + final String xpath, + final DataInfo info) { + final List res = new ArrayList<>(); + for (final Object o : node.selectNodes(xpath)) { + final Node n = (Node) o; + res + .add( + subject( + n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), + n.valueOf("@schemename"), info)); + } + return res; + } + protected OAIProvenance prepareOAIprovenance(final Document doc) { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index b7afd35950..9225e174d3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -84,8 +84,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { } @Override - protected List prepareSubjects(final Document doc, final DataInfo info) { - return prepareListStructProps(doc, "//dc:subject", info); + protected List prepareSubjects(final Document doc, final DataInfo info) { + return prepareSubjectList(doc, "//dc:subject", info); } @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 1bbeac9fb8..101f746076 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -249,8 +249,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { } @Override - protected List prepareSubjects(final Document doc, final DataInfo info) { - return prepareListStructProps(doc, "//*[local-name()='subject']", info); + protected List prepareSubjects(final Document doc, final DataInfo info) { + return prepareSubjectList(doc, "//*[local-name()='subject']", info); } @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index c8e41743fa..e333da1aa0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -59,7 +59,7 @@ class ResolveEntitiesTest extends Serializable { r.setId(id.toLowerCase.trim) r.setSubject( List( - OafMapperUtils.structuredProperty( + OafMapperUtils.subject( FAKE_SUBJECT, OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), null @@ -250,7 +250,7 @@ class ResolveEntitiesTest extends Serializable { val r = new Result r.setSubject( List( - OafMapperUtils.structuredProperty( + OafMapperUtils.subject( FAKE_SUBJECT, OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"), null diff --git a/pom.xml b/pom.xml index 973bc37732..632ca99b83 100644 --- a/pom.xml +++ b/pom.xml @@ -801,7 +801,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.12.2-SNAPSHOT] + [2.13.2-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6]