forked from D-Net/dnet-hadoop
WIP: cleaning of subjects
This commit is contained in:
parent
6c0fd9284b
commit
32cee1f619
|
@ -85,8 +85,8 @@ public class Vocabulary implements Serializable {
|
||||||
|
|
||||||
public Qualifier lookup(String id) {
|
public Qualifier lookup(String id) {
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(getSynonymAsQualifier(id))
|
.ofNullable(getSynonymAsQualifier(id))
|
||||||
.orElse(getTermAsQualifier(id));
|
.orElse(getTermAsQualifier(id));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -83,9 +83,9 @@ public class VocabularyGroup implements Serializable {
|
||||||
|
|
||||||
public Optional<Vocabulary> find(final String vocId) {
|
public Optional<Vocabulary> find(final String vocId) {
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(vocId)
|
.ofNullable(vocId)
|
||||||
.map(String::toLowerCase)
|
.map(String::toLowerCase)
|
||||||
.map(vocs::get);
|
.map(vocs::get);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addTerm(final String vocId, final String id, final String name) {
|
public void addTerm(final String vocId, final String id, final String name) {
|
||||||
|
|
|
@ -4,15 +4,15 @@ package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
|
import javax.jws.WebParam;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer;
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import javax.jws.WebParam;
|
|
||||||
|
|
||||||
public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Object>> implements Serializable {
|
public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Object>> implements Serializable {
|
||||||
|
|
||||||
|
|
|
@ -251,6 +251,17 @@ public class GraphCleaningFunctionsTest {
|
||||||
pid.getQualifier().getClassname()));
|
pid.getQualifier().getClassname()));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
assertNotNull(p_cleaned.getSubject());
|
||||||
|
|
||||||
|
List<Subject> fos_subjects = p_cleaned
|
||||||
|
.getSubject()
|
||||||
|
.stream()
|
||||||
|
.filter(s -> ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
assertNotNull(fos_subjects);
|
||||||
|
assertEquals(2, fos_subjects.size());
|
||||||
|
|
||||||
// TODO add more assertions to verity the cleaned values
|
// TODO add more assertions to verity the cleaned values
|
||||||
System.out.println(MAPPER.writeValueAsString(p_cleaned));
|
System.out.println(MAPPER.writeValueAsString(p_cleaned));
|
||||||
}
|
}
|
||||||
|
|
|
@ -743,12 +743,12 @@
|
||||||
"trust": "0.9"
|
"trust": "0.9"
|
||||||
},
|
},
|
||||||
"qualifier": {
|
"qualifier": {
|
||||||
"classid": "",
|
"classid": "keyword",
|
||||||
"classname": "",
|
"classname": "keyword",
|
||||||
"schemeid": "",
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
"schemename": ""
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
},
|
},
|
||||||
"value": "infrared detectors"
|
"value": "FOS: Mathematics"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"dataInfo": {
|
"dataInfo": {
|
||||||
|
@ -765,12 +765,12 @@
|
||||||
"trust": "0.9"
|
"trust": "0.9"
|
||||||
},
|
},
|
||||||
"qualifier": {
|
"qualifier": {
|
||||||
"classid": "",
|
"classid": "keyword",
|
||||||
"classname": "",
|
"classname": "keyword",
|
||||||
"schemeid": "",
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
"schemename": ""
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
},
|
},
|
||||||
"value": "lens antennas"
|
"value": "FOS: Computer and information sciences"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"dataInfo": {
|
"dataInfo": {
|
||||||
|
|
|
@ -1243,4 +1243,6 @@ dnet:relation_relClass @=@ IsSupplementTo @=@ isSupplementTo
|
||||||
dnet:relation_relClass @=@ IsSupplementedBy @=@ isSupplementedBy
|
dnet:relation_relClass @=@ IsSupplementedBy @=@ isSupplementedBy
|
||||||
dnet:relation_relClass @=@ IsRelatedTo @=@ isRelatedTo
|
dnet:relation_relClass @=@ IsRelatedTo @=@ isRelatedTo
|
||||||
dnet:relation_subRelType @=@ relationship @=@ publicationDataset
|
dnet:relation_subRelType @=@ relationship @=@ publicationDataset
|
||||||
dnet:provenanceActions @=@ iis @=@ erroneous label to be cleaned
|
dnet:provenanceActions @=@ iis @=@ erroneous label to be cleaned
|
||||||
|
FOS @=@ 0101 mathematics @=@ FOS: Mathematics
|
||||||
|
FOS @=@ 0102 computer and information sciences @=@ FOS: Computer and information sciences
|
|
@ -1117,4 +1117,7 @@ dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ relationship @=@ relat
|
||||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ review @=@ review
|
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ review @=@ review
|
||||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ similarity @=@ similarity
|
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ similarity @=@ similarity
|
||||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ supplement @=@ supplement
|
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ supplement @=@ supplement
|
||||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ version @=@ version
|
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ version @=@ version
|
||||||
|
FOS @=@ Fields of Science and Technology classification @=@ 0101 mathematics @=@ 0101 mathematics
|
||||||
|
FOS @=@ Fields of Science and Technology classification @=@ 0102 computer and information sciences @=@ 0102 computer and information sciences
|
||||||
|
FOS @=@ Fields of Science and Technology classification @=@ 0103 physical sciences @=@ 0103 physical sciences
|
Loading…
Reference in New Issue