forked from antonis.lempesis/dnet-hadoop
WIP: cleaning of subjects
This commit is contained in:
parent
27a91841e7
commit
b78889a0ce
|
@ -83,4 +83,10 @@ public class Vocabulary implements Serializable {
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Qualifier lookup(String id) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(getSynonymAsQualifier(id))
|
||||||
|
.orElse(getTermAsQualifier(id));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,6 +81,13 @@ public class VocabularyGroup implements Serializable {
|
||||||
vocs.put(id.toLowerCase(), new Vocabulary(id, name));
|
vocs.put(id.toLowerCase(), new Vocabulary(id, name));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Optional<Vocabulary> find(final String vocId) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(vocId)
|
||||||
|
.map(String::toLowerCase)
|
||||||
|
.map(vocs::get);
|
||||||
|
}
|
||||||
|
|
||||||
public void addTerm(final String vocId, final String id, final String name) {
|
public void addTerm(final String vocId, final String id, final String name) {
|
||||||
if (vocabularyExists(vocId)) {
|
if (vocabularyExists(vocId)) {
|
||||||
vocs.get(vocId.toLowerCase()).addTerm(id, name);
|
vocs.get(vocId.toLowerCase()).addTerm(id, name);
|
||||||
|
|
|
@ -4,15 +4,15 @@ package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.AccessRight;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Country;
|
import javax.jws.WebParam;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
|
|
||||||
public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Object>> implements Serializable {
|
public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Object>> implements Serializable {
|
||||||
|
|
||||||
|
@ -27,10 +27,29 @@ public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Obje
|
||||||
mapping.put(AccessRight.class, o -> cleanQualifier(vocabularies, (AccessRight) o));
|
mapping.put(AccessRight.class, o -> cleanQualifier(vocabularies, (AccessRight) o));
|
||||||
mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o));
|
mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o));
|
||||||
mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o));
|
mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o));
|
||||||
|
mapping.put(Subject.class, o -> cleanSubject(vocabularies, (Subject) o));
|
||||||
return mapping;
|
return mapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void cleanSubject(VocabularyGroup vocabularies, Subject s) {
|
||||||
|
// TODO cleaning based on different subject vocabs can be added here
|
||||||
|
cleanSubjectForVocabulary(ModelConstants.DNET_SUBJECT_FOS_CLASSID, vocabularies, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void cleanSubjectForVocabulary(String vocabularyId, VocabularyGroup vocabularies, Subject s) {
|
||||||
|
vocabularies.find(vocabularyId).ifPresent(vocabulary -> {
|
||||||
|
if (!ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(s.getQualifier().getClassid())) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Qualifier newValue = vocabulary.lookup(s.getValue());
|
||||||
|
if (!s.getValue().equals(newValue.getClassid())) {
|
||||||
|
s.setValue(newValue.getClassid());
|
||||||
|
s.getQualifier().setClassid(vocabularyId);
|
||||||
|
s.getQualifier().setClassname(vocabulary.getName());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
private static void cleanRelation(VocabularyGroup vocabularies, Relation r) {
|
private static void cleanRelation(VocabularyGroup vocabularies, Relation r) {
|
||||||
if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) {
|
if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) {
|
||||||
Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType());
|
Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType());
|
||||||
|
|
Loading…
Reference in New Issue