2020-06-09 17:20:40 +02:00
|
|
|
|
|
|
|
package eu.dnetlib.dhp.oa.graph.clean;
|
|
|
|
|
2020-06-13 13:06:04 +02:00
|
|
|
import java.io.Serializable;
|
|
|
|
import java.util.HashMap;
|
2022-10-13 11:23:43 +02:00
|
|
|
import java.util.Objects;
|
2022-08-05 16:56:09 +02:00
|
|
|
import java.util.concurrent.atomic.AtomicReference;
|
2020-06-12 12:03:25 +02:00
|
|
|
|
2022-08-05 16:56:09 +02:00
|
|
|
import org.apache.commons.lang3.SerializationUtils;
|
2020-06-18 19:37:25 +02:00
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
|
2020-06-13 13:06:04 +02:00
|
|
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer;
|
2021-01-25 15:43:04 +01:00
|
|
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
2022-10-13 11:23:43 +02:00
|
|
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm;
|
2020-06-18 19:37:25 +02:00
|
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
2022-08-05 12:32:08 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.*;
|
2020-06-12 10:45:18 +02:00
|
|
|
|
2021-08-11 12:13:22 +02:00
|
|
|
public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Object>> implements Serializable {
|
2020-06-09 17:20:40 +02:00
|
|
|
|
2020-06-12 18:25:47 +02:00
|
|
|
/**
|
2020-06-13 13:06:04 +02:00
|
|
|
* Creates the mapping for the Oaf types subject to cleaning
|
|
|
|
*
|
2020-06-12 18:25:47 +02:00
|
|
|
* @param vocabularies
|
|
|
|
*/
|
2020-06-13 13:06:04 +02:00
|
|
|
public static CleaningRuleMap create(VocabularyGroup vocabularies) {
|
|
|
|
CleaningRuleMap mapping = new CleaningRuleMap();
|
2020-06-18 19:37:25 +02:00
|
|
|
mapping.put(Qualifier.class, o -> cleanQualifier(vocabularies, (Qualifier) o));
|
2021-01-12 15:36:38 +01:00
|
|
|
mapping.put(AccessRight.class, o -> cleanQualifier(vocabularies, (AccessRight) o));
|
2021-09-15 16:10:37 +02:00
|
|
|
mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o));
|
|
|
|
mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o));
|
2022-08-05 09:11:37 +02:00
|
|
|
mapping.put(Subject.class, o -> cleanSubject(vocabularies, (Subject) o));
|
2020-06-13 13:06:04 +02:00
|
|
|
return mapping;
|
2020-06-09 19:52:53 +02:00
|
|
|
}
|
2020-06-12 18:25:47 +02:00
|
|
|
|
2022-08-05 16:56:09 +02:00
|
|
|
private static void cleanSubject(VocabularyGroup vocabularies, Subject subject) {
|
2022-10-13 11:23:43 +02:00
|
|
|
cleanSubjectForVocabulary(ModelConstants.DNET_SUBJECT_FOS_CLASSID, vocabularies, subject);
|
|
|
|
// TODO cleaning based on different subject vocabs can be added here
|
2022-08-05 09:11:37 +02:00
|
|
|
}
|
|
|
|
|
2022-10-13 11:23:43 +02:00
|
|
|
private static void cleanSubjectForVocabulary(String vocabularyId, VocabularyGroup vocabularies,
|
2022-08-05 16:56:09 +02:00
|
|
|
Subject subject) {
|
2022-10-13 11:23:43 +02:00
|
|
|
|
2022-08-05 09:11:37 +02:00
|
|
|
vocabularies.find(vocabularyId).ifPresent(vocabulary -> {
|
2022-10-13 11:23:43 +02:00
|
|
|
if (ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(subject.getQualifier().getClassid())) {
|
|
|
|
Qualifier newValue = vocabulary.lookup(subject.getValue());
|
|
|
|
if (!ModelConstants.UNKNOWN.equals(newValue.getClassid())) {
|
|
|
|
subject.setValue(newValue.getClassid());
|
|
|
|
subject.getQualifier().setClassid(vocabularyId);
|
|
|
|
subject.getQualifier().setClassname(vocabulary.getName());
|
|
|
|
}
|
|
|
|
} else if (vocabularyId.equals(subject.getQualifier().getClassid())) {
|
|
|
|
Qualifier syn = vocabulary.getSynonymAsQualifier(subject.getValue());
|
|
|
|
VocabularyTerm term = vocabulary.getTerm(subject.getValue());
|
|
|
|
if (Objects.isNull(syn) && Objects.isNull(term)) {
|
|
|
|
subject.getQualifier().setClassid(ModelConstants.DNET_SUBJECT_KEYWORD);
|
|
|
|
subject.getQualifier().setClassname(ModelConstants.DNET_SUBJECT_KEYWORD);
|
|
|
|
}
|
2022-08-05 09:11:37 +02:00
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-09-15 16:10:37 +02:00
|
|
|
private static void cleanRelation(VocabularyGroup vocabularies, Relation r) {
|
|
|
|
if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) {
|
|
|
|
Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType());
|
|
|
|
r.setSubRelType(newValue.getClassid());
|
|
|
|
}
|
|
|
|
if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_RELCLASS)) {
|
|
|
|
Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_RELCLASS, r.getRelClass());
|
|
|
|
r.setRelClass(newValue.getClassid());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private static void cleanCountry(VocabularyGroup vocabularies, Country o) {
|
|
|
|
final Country c = o;
|
|
|
|
if (StringUtils.isBlank(c.getSchemeid())) {
|
|
|
|
c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE);
|
|
|
|
c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE);
|
|
|
|
}
|
|
|
|
cleanQualifier(vocabularies, c);
|
|
|
|
}
|
|
|
|
|
2020-06-18 19:37:25 +02:00
|
|
|
private static <Q extends Qualifier> void cleanQualifier(VocabularyGroup vocabularies, Q q) {
|
|
|
|
if (vocabularies.vocabularyExists(q.getSchemeid())) {
|
|
|
|
Qualifier newValue = vocabularies.lookup(q.getSchemeid(), q.getClassid());
|
|
|
|
q.setClassid(newValue.getClassid());
|
|
|
|
q.setClassname(newValue.getClassname());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-09 17:20:40 +02:00
|
|
|
}
|