1
0
Fork 0

vocabulary based cleaning considers also the term label when looking up for a synonym

This commit is contained in:
Claudio Atzori 2021-12-09 13:57:53 +01:00
parent 1de881b796
commit cd9c51fd7a
3 changed files with 99 additions and 2 deletions

View File

@ -57,9 +57,17 @@ public class VocabularyGroup implements Serializable {
final String syn = arr[2].trim();
vocs.addSynonyms(vocId, termId, syn);
}
}
// add the term names as synonyms
vocs.vocs.values().forEach(voc -> {
voc.getTerms().values().forEach(term -> {
voc.addSynonym(term.getName().toLowerCase(), term.getId());
});
});
return vocs;
}

View File

@ -101,6 +101,9 @@ public class GraphCleaningFunctionsTest {
assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid());
assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname());
assertEquals("0033", p_out.getInstance().get(1).getInstancetype().getClassid());
assertEquals("Audiovisual", p_out.getInstance().get(1).getInstancetype().getClassname());
assertEquals("CLOSED", p_out.getInstance().get(0).getAccessright().getClassid());
assertEquals("Closed Access", p_out.getInstance().get(0).getAccessright().getClassname());
@ -114,7 +117,7 @@ public class GraphCleaningFunctionsTest {
List<Instance> poi = p_out.getInstance();
assertNotNull(poi);
assertEquals(1, poi.size());
assertEquals(2, poi.size());
final Instance poii = poi.get(0);
assertNotNull(poii);
@ -163,7 +166,7 @@ public class GraphCleaningFunctionsTest {
final List<Instance> pci = p_cleaned.getInstance();
assertNotNull(pci);
assertEquals(1, pci.size());
assertEquals(2, pci.size());
final Instance pcii = pci.get(0);
assertNotNull(pcii);

View File

@ -403,6 +403,92 @@
"http://juuli.fi/Record/0275158616",
"http://dx.doi.org/10.1007/s109090161569x"
]
},
{
"pid": [
{
"dataInfo": null,
"qualifier": {
"classid": "doi",
"classname": "doi",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "10.1002/s21010127267xy"
},
{
"dataInfo": null,
"qualifier": {
"classid": "doi",
"classname": "doi",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "10.1008/abcd"
}
],
"alternateIdentifier": [
{
"dataInfo": null,
"qualifier": {
"classid": "doi",
"classname": "doi",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "10.1007/s109090161569x"
},
{
"dataInfo": null,
"qualifier": {
"classid": "doi",
"classname": "doi",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "10.1009/qwerty"
}
],
"accessright": {
"classid": "CLOSED",
"classname": "CLOSED",
"schemeid": "dnet:access_modes",
"schemename": "dnet:access_modes"
},
"collectedfrom": {
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
"value": "VIRTA"
},
"dateofacceptance": {
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"value": "2016-01-01"
},
"distributionlocation": "",
"hostedby": {
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
"value": "VIRTA"
},
"instancetype": {
"classid": "Audiovisual",
"classname": "Audiovisual",
"schemeid": "dnet:publication_resource",
"schemename": "dnet:publication_resource"
},
"url": [
"http://dx.doi.org/10.1002/s21010127267xy"
]
}
],
"journal": {