forked from D-Net/dnet-hadoop
cleaning of subjects
This commit is contained in:
parent
32cee1f619
commit
4eaa063b1f
|
@ -3,13 +3,12 @@ package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
import javax.jws.WebParam;
|
import org.apache.commons.lang3.SerializationUtils;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
@ -31,23 +30,30 @@ public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Obje
|
||||||
return mapping;
|
return mapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void cleanSubject(VocabularyGroup vocabularies, Subject s) {
|
private static void cleanSubject(VocabularyGroup vocabularies, Subject subject) {
|
||||||
|
if (cleanSubjectForVocabulary(ModelConstants.DNET_SUBJECT_FOS_CLASSID, vocabularies, subject)) {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
// TODO cleaning based on different subject vocabs can be added here
|
// TODO cleaning based on different subject vocabs can be added here
|
||||||
cleanSubjectForVocabulary(ModelConstants.DNET_SUBJECT_FOS_CLASSID, vocabularies, s);
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void cleanSubjectForVocabulary(String vocabularyId, VocabularyGroup vocabularies, Subject s) {
|
private static boolean cleanSubjectForVocabulary(String vocabularyId, VocabularyGroup vocabularies,
|
||||||
|
Subject subject) {
|
||||||
|
AtomicReference<Boolean> modified = new AtomicReference<>(false);
|
||||||
vocabularies.find(vocabularyId).ifPresent(vocabulary -> {
|
vocabularies.find(vocabularyId).ifPresent(vocabulary -> {
|
||||||
if (!ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(s.getQualifier().getClassid())) {
|
if (!ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(subject.getQualifier().getClassid())) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Qualifier newValue = vocabulary.lookup(s.getValue());
|
Qualifier newValue = vocabulary.lookup(subject.getValue());
|
||||||
if (!s.getValue().equals(newValue.getClassid())) {
|
if (!subject.getValue().equals(newValue.getClassid())) {
|
||||||
s.setValue(newValue.getClassid());
|
subject.setValue(newValue.getClassid());
|
||||||
s.getQualifier().setClassid(vocabularyId);
|
subject.getQualifier().setClassid(vocabularyId);
|
||||||
s.getQualifier().setClassname(vocabulary.getName());
|
subject.getQualifier().setClassname(vocabulary.getName());
|
||||||
|
modified.set(true);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
return modified.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void cleanRelation(VocabularyGroup vocabularies, Relation r) {
|
private static void cleanRelation(VocabularyGroup vocabularies, Relation r) {
|
||||||
|
|
|
@ -262,6 +262,20 @@ public class GraphCleaningFunctionsTest {
|
||||||
assertNotNull(fos_subjects);
|
assertNotNull(fos_subjects);
|
||||||
assertEquals(2, fos_subjects.size());
|
assertEquals(2, fos_subjects.size());
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
fos_subjects
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
s -> "0101 mathematics".equals(s.getValue()) &
|
||||||
|
ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid())));
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
fos_subjects
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
s -> "0102 computer and information sciences".equals(s.getValue()) &
|
||||||
|
ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid())));
|
||||||
|
|
||||||
// TODO add more assertions to verity the cleaned values
|
// TODO add more assertions to verity the cleaned values
|
||||||
System.out.println(MAPPER.writeValueAsString(p_cleaned));
|
System.out.println(MAPPER.writeValueAsString(p_cleaned));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue