From b78889a0ce27a79c7ab2d8da05b118ee4f1bcb36 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 5 Aug 2022 09:11:37 +0200 Subject: [PATCH] WIP: cleaning of subjects --- .../dhp/common/vocabulary/Vocabulary.java | 6 ++++ .../common/vocabulary/VocabularyGroup.java | 7 +++++ .../dhp/oa/graph/clean/CleaningRuleMap.java | 29 +++++++++++++++---- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java index b3eb98d4f..24a30500d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java @@ -83,4 +83,10 @@ public class Vocabulary implements Serializable { .orElse(null); } + public Qualifier lookup(String id) { + return Optional + .ofNullable(getSynonymAsQualifier(id)) + .orElse(getTermAsQualifier(id)); + } + } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java index 1c129ff9c..8435b8bf3 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java @@ -81,6 +81,13 @@ public class VocabularyGroup implements Serializable { vocs.put(id.toLowerCase(), new Vocabulary(id, name)); } + public Optional find(final String vocId) { + return Optional + .ofNullable(vocId) + .map(String::toLowerCase) + .map(vocs::get); + } + public void addTerm(final String vocId, final String id, final String name) { if (vocabularyExists(vocId)) { vocs.get(vocId.toLowerCase()).addTerm(id, name); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index 6c156edb7..2a4183f1b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -4,15 +4,15 @@ package eu.dnetlib.dhp.oa.graph.clean; import java.io.Serializable; import java.util.HashMap; +import eu.dnetlib.dhp.common.vocabulary.Vocabulary; +import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.AccessRight; -import eu.dnetlib.dhp.schema.oaf.Country; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; + +import javax.jws.WebParam; public class CleaningRuleMap extends HashMap, SerializableConsumer> implements Serializable { @@ -27,10 +27,29 @@ public class CleaningRuleMap extends HashMap, SerializableConsumer cleanQualifier(vocabularies, (AccessRight) o)); mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o)); mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o)); - + mapping.put(Subject.class, o -> cleanSubject(vocabularies, (Subject) o)); return mapping; } + private static void cleanSubject(VocabularyGroup vocabularies, Subject s) { + // TODO cleaning based on different subject vocabs can be added here + cleanSubjectForVocabulary(ModelConstants.DNET_SUBJECT_FOS_CLASSID, vocabularies, s); + } + + private static void cleanSubjectForVocabulary(String vocabularyId, VocabularyGroup vocabularies, Subject s) { + vocabularies.find(vocabularyId).ifPresent(vocabulary -> { + if (!ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(s.getQualifier().getClassid())) { + return; + } + Qualifier newValue = vocabulary.lookup(s.getValue()); + if (!s.getValue().equals(newValue.getClassid())) { + s.setValue(newValue.getClassid()); + s.getQualifier().setClassid(vocabularyId); + s.getQualifier().setClassname(vocabulary.getName()); + } + }); + } + private static void cleanRelation(VocabularyGroup vocabularies, Relation r) { if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) { Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType());