From 359b8ebda81abb5fd82fa26028b65dd0fa7bead0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 25 Jul 2024 15:22:29 +0200 Subject: [PATCH 1/2] [graph provision] include only FoS L1..L2 in the record serialization --- .../model/ProvisionModelSupport.java | 11 ++++++++ .../oa/provision/utils/XmlRecordFactory.java | 6 ++-- .../utils/XmlSerializationUtils.java | 4 --- .../oa/provision/XmlRecordFactoryTest.java | 2 ++ .../dnetlib/dhp/oa/provision/publication.json | 28 +++++++++++++++++-- 5 files changed, 40 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index 1a75deafc..277d0deb6 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -702,6 +702,7 @@ public class ProvisionModelSupport { .stream() .filter(s -> Objects.nonNull(s.getQualifier())) .filter(s -> Objects.nonNull(s.getQualifier().getClassname())) + .filter(ProvisionModelSupport::filterFosL1L2) .map( s -> Subject .newInstance(s.getValue(), s.getQualifier().getClassid(), s.getQualifier().getClassname())) @@ -709,6 +710,16 @@ public class ProvisionModelSupport { .orElse(null); } + public static boolean filterFosL1L2(StructuredProperty s) { + final String subjectType = Optional.ofNullable(s.getQualifier()).map(Qualifier::getClassid).orElse(""); + if (ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(subjectType)) { + String code = StringUtils.substringBefore(s.getValue(), " "); + return code.matches("^\\d{2}$|^\\d{4}$"); + } + + return true; + } + private static Country asCountry(eu.dnetlib.dhp.schema.oaf.Qualifier country) { return Optional .ofNullable(country) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 899dad221..44004faf3 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -20,6 +20,7 @@ import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import eu.dnetlib.dhp.oa.provision.model.*; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -41,10 +42,6 @@ import com.google.common.collect.Sets; import com.mycila.xmltool.XMLDoc; import com.mycila.xmltool.XMLTag; -import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; -import eu.dnetlib.dhp.oa.provision.model.XmlInstance; import eu.dnetlib.dhp.schema.common.*; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Result; @@ -389,6 +386,7 @@ public class XmlRecordFactory implements Serializable { .getSubject() .stream() .filter(Objects::nonNull) + .filter(ProvisionModelSupport::filterFosL1L2) .map(s -> XmlSerializationUtils.mapStructuredProperty("subject", s)) .collect(Collectors.toList())); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java index fbd647ae4..b4517002c 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java @@ -5,11 +5,7 @@ import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix; import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.commons.lang3.StringUtils.isNotBlank; -import java.util.HashSet; import java.util.List; -import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index ab4301f9a..dcd021db1 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -97,6 +97,8 @@ public class XmlRecordFactoryTest { assertEquals("bronze", doc.valueOf("//*[local-name() = 'result']/openaccesscolor/text()")); assertEquals("true", doc.valueOf("//*[local-name() = 'result']/isindiamondjournal/text()")); assertEquals("true", doc.valueOf("//*[local-name() = 'result']/publiclyfunded/text()")); + + assertEquals(15, doc.selectNodes("//*[local-name() = 'result']/*[local-name() = 'subject']").size()); } @Test diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index a89ec62d5..a073fbebd 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -1886,12 +1886,34 @@ "trust": "" }, "qualifier": { - "classid": "keyword", - "classname": "keyword", + "classid": "FOS", + "classname": "Fields of Science and Technology classification", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies" }, - "value": "Thermal conductivity" + "value": "0101 mathematics" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "FOS", + "classname": "Fields of Science and Technology classification", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "010101 applied mathematics" } ], "title": [ -- 2.17.1 From a81c555fe6bfa23b7c4108eac2d0415d78c8a630 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 25 Jul 2024 15:26:47 +0200 Subject: [PATCH 2/2] [graph provision] include only FoS L1..L2 in the record serialization --- .../dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index 277d0deb6..4a2326453 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -687,6 +687,7 @@ public class ProvisionModelSupport { .stream() .filter(s -> Objects.nonNull(s.getQualifier())) .filter(s -> Objects.nonNull(s.getQualifier().getClassname())) + .filter(ProvisionModelSupport::filterFosL1L2) .map( s -> Subject .newInstance(s.getValue(), s.getQualifier().getClassid(), s.getQualifier().getClassname())) @@ -702,7 +703,6 @@ public class ProvisionModelSupport { .stream() .filter(s -> Objects.nonNull(s.getQualifier())) .filter(s -> Objects.nonNull(s.getQualifier().getClassname())) - .filter(ProvisionModelSupport::filterFosL1L2) .map( s -> Subject .newInstance(s.getValue(), s.getQualifier().getClassid(), s.getQualifier().getClassname())) -- 2.17.1