WIP: cleaning of subjects

This commit is contained in:
Claudio Atzori 2022-08-05 09:11:37 +02:00
parent 27a91841e7
commit b78889a0ce
3 changed files with 37 additions and 5 deletions

View File

@ -83,4 +83,10 @@ public class Vocabulary implements Serializable {
.orElse(null); .orElse(null);
} }
public Qualifier lookup(String id) {
return Optional
.ofNullable(getSynonymAsQualifier(id))
.orElse(getTermAsQualifier(id));
}
} }

View File

@ -81,6 +81,13 @@ public class VocabularyGroup implements Serializable {
vocs.put(id.toLowerCase(), new Vocabulary(id, name)); vocs.put(id.toLowerCase(), new Vocabulary(id, name));
} }
public Optional<Vocabulary> find(final String vocId) {
return Optional
.ofNullable(vocId)
.map(String::toLowerCase)
.map(vocs::get);
}
public void addTerm(final String vocId, final String id, final String name) { public void addTerm(final String vocId, final String id, final String name) {
if (vocabularyExists(vocId)) { if (vocabularyExists(vocId)) {
vocs.get(vocId.toLowerCase()).addTerm(id, name); vocs.get(vocId.toLowerCase()).addTerm(id, name);

View File

@ -4,15 +4,15 @@ package eu.dnetlib.dhp.oa.graph.clean;
import java.io.Serializable; import java.io.Serializable;
import java.util.HashMap; import java.util.HashMap;
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.Country; import javax.jws.WebParam;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Object>> implements Serializable { public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Object>> implements Serializable {
@ -27,10 +27,29 @@ public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Obje
mapping.put(AccessRight.class, o -> cleanQualifier(vocabularies, (AccessRight) o)); mapping.put(AccessRight.class, o -> cleanQualifier(vocabularies, (AccessRight) o));
mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o)); mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o));
mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o)); mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o));
mapping.put(Subject.class, o -> cleanSubject(vocabularies, (Subject) o));
return mapping; return mapping;
} }
private static void cleanSubject(VocabularyGroup vocabularies, Subject s) {
// TODO cleaning based on different subject vocabs can be added here
cleanSubjectForVocabulary(ModelConstants.DNET_SUBJECT_FOS_CLASSID, vocabularies, s);
}
private static void cleanSubjectForVocabulary(String vocabularyId, VocabularyGroup vocabularies, Subject s) {
vocabularies.find(vocabularyId).ifPresent(vocabulary -> {
if (!ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(s.getQualifier().getClassid())) {
return;
}
Qualifier newValue = vocabulary.lookup(s.getValue());
if (!s.getValue().equals(newValue.getClassid())) {
s.setValue(newValue.getClassid());
s.getQualifier().setClassid(vocabularyId);
s.getQualifier().setClassname(vocabulary.getName());
}
});
}
private static void cleanRelation(VocabularyGroup vocabularies, Relation r) { private static void cleanRelation(VocabularyGroup vocabularies, Relation r) {
if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) { if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) {
Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType()); Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType());