enrichment steps #38

Merged
claudio.atzori merged 334 commits from miriam.baglioni/dnet-hadoop:master into enrichment_wfs 2020-08-11 16:40:26 +02:00
6 changed files with 1400 additions and 1412 deletions
Showing only changes of commit 2a4f65795f - Show all commits

View File

@ -61,7 +61,7 @@ public class Vocabulary implements Serializable {
} }
public VocabularyTerm getTermBySynonym(final String syn) { public VocabularyTerm getTermBySynonym(final String syn) {
return getTerm(synonyms.get(syn)); return getTerm(synonyms.get(syn.toLowerCase()));
} }
public Qualifier getTermAsQualifier(final String termId) { public Qualifier getTermAsQualifier(final String termId) {

View File

@ -3,7 +3,6 @@ package eu.dnetlib.dhp.oa.graph.raw.common;
import java.io.Serializable; import java.io.Serializable;
import java.util.*; import java.util.*;
import java.util.function.Supplier;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -46,7 +45,7 @@ public class VocabularyGroup implements Serializable {
} }
vocs.addTerm(vocId, termId, termName); vocs.addTerm(vocId, termId, termName);
vocs.addSynonyms(vocId, termId, termId); // vocs.addSynonyms(vocId, termId, termId);
} }
} }
@ -58,7 +57,7 @@ public class VocabularyGroup implements Serializable {
final String syn = arr[2].trim(); final String syn = arr[2].trim();
vocs.addSynonyms(vocId, termId, syn); vocs.addSynonyms(vocId, termId, syn);
vocs.addSynonyms(vocId, termId, termId); // vocs.addSynonyms(vocId, termId, termId);
} }
} }
@ -135,7 +134,7 @@ public class VocabularyGroup implements Serializable {
Optional Optional
.ofNullable(vocs.get(id)) .ofNullable(vocs.get(id))
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId)) .orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
.addSynonym(syn, termId); .addSynonym(syn.toLowerCase(), termId);
} }
} }

View File

@ -60,8 +60,8 @@ public class CleaningFunctionTest {
assertNotNull(p_out); assertNotNull(p_out);
assertEquals("eng", p_out.getLanguage().getClassid()); assertEquals("und", p_out.getLanguage().getClassid());
assertEquals("English", p_out.getLanguage().getClassname()); assertEquals("Undetermined", p_out.getLanguage().getClassname());
assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid()); assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid());
assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname()); assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname());

View File

@ -281,8 +281,8 @@
"value": "VIRTA" "value": "VIRTA"
}, },
"instancetype": { "instancetype": {
"classid": "Comentario", "classid": "Comment/debate",
"classname": "Comentario", "classname": "Comment/debate",
"schemeid": "dnet:publication_resource", "schemeid": "dnet:publication_resource",
"schemename": "dnet:publication_resource" "schemename": "dnet:publication_resource"
}, },
@ -317,8 +317,8 @@
"vol": "" "vol": ""
}, },
"language": { "language": {
"classid": "en", "classid": "UNKNOWN",
"classname": "en", "classname": "UNKNOWN",
"schemeid": "dnet:languages", "schemeid": "dnet:languages",
"schemename": "dnet:languages" "schemename": "dnet:languages"
}, },