forked from D-Net/dnet-hadoop
vocabulary based cleaning considers also the term label when looking up for a synonym
This commit is contained in:
parent
5d51b3dd4a
commit
e6e177dda0
|
@ -57,9 +57,17 @@ public class VocabularyGroup implements Serializable {
|
||||||
final String syn = arr[2].trim();
|
final String syn = arr[2].trim();
|
||||||
|
|
||||||
vocs.addSynonyms(vocId, termId, syn);
|
vocs.addSynonyms(vocId, termId, syn);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// add the term names as synonyms
|
||||||
|
vocs.vocs.values().forEach(voc -> {
|
||||||
|
voc.getTerms().values().forEach(term -> {
|
||||||
|
voc.addSynonym(term.getName().toLowerCase(), term.getId());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
return vocs;
|
return vocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -151,6 +151,9 @@ public class GraphCleaningFunctionsTest {
|
||||||
assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid());
|
assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid());
|
||||||
assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname());
|
assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname());
|
||||||
|
|
||||||
|
assertEquals("0033", p_out.getInstance().get(1).getInstancetype().getClassid());
|
||||||
|
assertEquals("Audiovisual", p_out.getInstance().get(1).getInstancetype().getClassname());
|
||||||
|
|
||||||
assertEquals("CLOSED", p_out.getInstance().get(0).getAccessright().getClassid());
|
assertEquals("CLOSED", p_out.getInstance().get(0).getAccessright().getClassid());
|
||||||
assertEquals("Closed Access", p_out.getInstance().get(0).getAccessright().getClassname());
|
assertEquals("Closed Access", p_out.getInstance().get(0).getAccessright().getClassname());
|
||||||
|
|
||||||
|
@ -164,7 +167,7 @@ public class GraphCleaningFunctionsTest {
|
||||||
|
|
||||||
List<Instance> poi = p_out.getInstance();
|
List<Instance> poi = p_out.getInstance();
|
||||||
assertNotNull(poi);
|
assertNotNull(poi);
|
||||||
assertEquals(1, poi.size());
|
assertEquals(2, poi.size());
|
||||||
|
|
||||||
final Instance poii = poi.get(0);
|
final Instance poii = poi.get(0);
|
||||||
assertNotNull(poii);
|
assertNotNull(poii);
|
||||||
|
@ -213,7 +216,7 @@ public class GraphCleaningFunctionsTest {
|
||||||
|
|
||||||
final List<Instance> pci = p_cleaned.getInstance();
|
final List<Instance> pci = p_cleaned.getInstance();
|
||||||
assertNotNull(pci);
|
assertNotNull(pci);
|
||||||
assertEquals(1, pci.size());
|
assertEquals(2, pci.size());
|
||||||
|
|
||||||
final Instance pcii = pci.get(0);
|
final Instance pcii = pci.get(0);
|
||||||
assertNotNull(pcii);
|
assertNotNull(pcii);
|
||||||
|
|
|
@ -403,6 +403,92 @@
|
||||||
"http://juuli.fi/Record/0275158616",
|
"http://juuli.fi/Record/0275158616",
|
||||||
"http://dx.doi.org/10.1007/s109090161569x"
|
"http://dx.doi.org/10.1007/s109090161569x"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"dataInfo": null,
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.1002/s21010127267xy"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": null,
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.1008/abcd"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"alternateIdentifier": [
|
||||||
|
{
|
||||||
|
"dataInfo": null,
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.1007/s109090161569x"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": null,
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.1009/qwerty"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"accessright": {
|
||||||
|
"classid": "CLOSED",
|
||||||
|
"classname": "CLOSED",
|
||||||
|
"schemeid": "dnet:access_modes",
|
||||||
|
"schemename": "dnet:access_modes"
|
||||||
|
},
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
||||||
|
"value": "VIRTA"
|
||||||
|
},
|
||||||
|
"dateofacceptance": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "2016-01-01"
|
||||||
|
},
|
||||||
|
"distributionlocation": "",
|
||||||
|
"hostedby": {
|
||||||
|
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
||||||
|
"value": "VIRTA"
|
||||||
|
},
|
||||||
|
"instancetype": {
|
||||||
|
"classid": "Audiovisual",
|
||||||
|
"classname": "Audiovisual",
|
||||||
|
"schemeid": "dnet:publication_resource",
|
||||||
|
"schemename": "dnet:publication_resource"
|
||||||
|
},
|
||||||
|
"url": [
|
||||||
|
"http://dx.doi.org/10.1002/s21010127267xy"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"journal": {
|
"journal": {
|
||||||
|
|
Loading…
Reference in New Issue