From 4eaa063b1f03f3b855ddc6687c905f6b269b1756 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 5 Aug 2022 16:56:09 +0200 Subject: [PATCH] cleaning of subjects --- .../dhp/oa/graph/clean/CleaningRuleMap.java | 32 +++++++++++-------- .../clean/GraphCleaningFunctionsTest.java | 14 ++++++++ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index 894d5d0590..c650400afc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -3,13 +3,12 @@ package eu.dnetlib.dhp.oa.graph.clean; import java.io.Serializable; import java.util.HashMap; +import java.util.concurrent.atomic.AtomicReference; -import javax.jws.WebParam; - +import org.apache.commons.lang3.SerializationUtils; import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer; -import eu.dnetlib.dhp.common.vocabulary.Vocabulary; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; @@ -31,23 +30,30 @@ public class CleaningRuleMap extends HashMap, SerializableConsumer modified = new AtomicReference<>(false); vocabularies.find(vocabularyId).ifPresent(vocabulary -> { - if (!ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(s.getQualifier().getClassid())) { + if (!ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(subject.getQualifier().getClassid())) { return; } - Qualifier newValue = vocabulary.lookup(s.getValue()); - if (!s.getValue().equals(newValue.getClassid())) { - s.setValue(newValue.getClassid()); - s.getQualifier().setClassid(vocabularyId); - s.getQualifier().setClassname(vocabulary.getName()); + Qualifier newValue = vocabulary.lookup(subject.getValue()); + if (!subject.getValue().equals(newValue.getClassid())) { + subject.setValue(newValue.getClassid()); + subject.getQualifier().setClassid(vocabularyId); + subject.getQualifier().setClassname(vocabulary.getName()); + modified.set(true); } }); + return modified.get(); } private static void cleanRelation(VocabularyGroup vocabularies, Relation r) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index f4c4581b16..0bfee11517 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -262,6 +262,20 @@ public class GraphCleaningFunctionsTest { assertNotNull(fos_subjects); assertEquals(2, fos_subjects.size()); + assertTrue( + fos_subjects + .stream() + .anyMatch( + s -> "0101 mathematics".equals(s.getValue()) & + ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()))); + + assertTrue( + fos_subjects + .stream() + .anyMatch( + s -> "0102 computer and information sciences".equals(s.getValue()) & + ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()))); + // TODO add more assertions to verity the cleaned values System.out.println(MAPPER.writeValueAsString(p_cleaned)); }