restyling of the JaroWinklerNormalizedName comparator, now it is optimized. Addition of some translations in the translation maps, addition of a clustering based on keywords in organizations legalnames

This commit is contained in:
miconis 2019-07-19 17:10:29 +02:00
parent d5d228aef3
commit 84974dcdfa
10 changed files with 117 additions and 153 deletions

View File

@ -46,6 +46,8 @@ public class SparkLocalTest {
return new Tuple2<>(mapDocument.getIdentifier(), mapDocument);
});
// mapDocs.foreach(doc -> System.out.println("doc = " + doc._2().getFieldMap().get("legalname")));
// mapDocs.filter(d -> d._2().getFieldMap().get("doi").stringValue().length() > 0).foreach(d -> System.out.println(d));
// mapDocs.filter(d -> d._2().getFieldMap().get("documentationUrl").stringValue().length() > 0).foreach(d -> System.out.println(d));

View File

@ -31,6 +31,8 @@
{ "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/websiteurl/value", "params" : { "host" : 0.5, "path" : 0.5 } },
{ "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {grid}]/value" }
],
"blacklists" : { }
"blacklists" : {
"legalname" : []
}
}
}

View File

@ -1,40 +1,25 @@
{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Humboldt_State_University"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.humboldt.edu/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt State University"},"country":{"classid":"US","classname":"United States","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::9c3522c59aef0edab19b8a3f0aeb39ed"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::9758583A-FF1E-41C4-9176-B875E8FAC110"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt State University"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-04","type":20,"id":"20|rcuk________::7715018b4838eaf1d57242c788e222d4"}
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Humboldt-Universität_zu_Berlin"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"HU"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::13ab9ef517038d3751f4b0e31aea9ac7"}
{"dateoftransformation":"2018-09-27","originalId":["re3data_____::678d9d5a712331f6e2fce7b7b764090f"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/de/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-27","type":20,"id":"20|re3data_____::aeb488fd15eb1be77b998b5602450910"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::CFF4C944-5CF1-4AE3-8C03-BE361D6DEDC3"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt University Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::ff1bccdf9520b3fadd2fc26103231de0"}
{"dateoftransformation":"2018-09-27","originalId":["re3data_____::4bda5f07be19914ce8e2e4652a72151c"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-27","type":20,"id":"20|re3data_____::d72a4f4665f7df9b48a22d4cfde0dd3c"}
{"dateoftransformation":"2018-09-13","originalId":["nih_________::HUMBOLDT_STATE_UNIVERSITY"],"collectedfrom":[{"value":"NIH - National Institutes of Health","key":"10|openaire____::9e9e8c76d739212c63eff362e321ba33"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT STATE UNIVERSITY"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-07-14","type":20,"id":"20|nih_________::8aec7ec3198fc69ce74e24b8f6aa9a59"}
{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Humboldt-Universität_zu_Berlin"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Humboldt-Universität zu Berlin"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::13ab9ef517038d3751f4b0e31aea9ac7"}
{"dateoftransformation":"2018-11-20","originalId":["corda_______::999850781"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UBER"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.hu-berlin.de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT-UNIVERSITAT ZU BERLIN"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::d999b4c2dc81ccd40100056ab0543088"}
{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999850781"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UBER"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.hu-berlin.de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT-UNIVERSITAET ZU BERLIN"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::d999b4c2dc81ccd40100056ab0543088"}
{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::9169966C-E38A-41D7-AF04-F7470963CBED"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-04", "type": 20, "id": "20|rcuk________::3eb464c9a21582d7dbb3f115710d863c"}
{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::CEEF86B3-BB98-4CAE-848D-00837C745DEC"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Yerevan Physics Institute"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-03", "type": 20, "id": "20|rcuk________::f8790ac2aa4eb6fb7cc2980eb7971ee6"}
{"dateoftransformation": "2018-09-13", "originalId": ["opendoar____::Aalto_University"], "collectedfrom": [{"key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb", "value": "OpenDOAR"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.aalto.fi/en/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2015-08-24", "type": 20, "id": "20|opendoar____::98845925f422ef4987294d6bfac525dd"}
{"dateoftransformation": "2018-09-13", "originalId": ["nsf_________::Institute_of_Physics"], "collectedfrom": [{"key": "10|openaire____::dd69b4a1513c9de9f46faf24048da1e8", "value": "NSF - National Science Foundation"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics"}, "country": {"classid": "YU", "classname": "Yugoslavia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-03-10", "type": 20, "id": "20|nsf_________::d540e5e89b1ace31d5dd0b8f658056ec"}
{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::EE4840D3-84C0-47A3-9109-30F67D0D550F"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-03", "type": 20, "id": "20|rcuk________::7559c8bbff5125d74919775a1f290496"}
{"dateoftransformation": "2019-05-29", "originalId": ["irb_hr______::Institute of Physics, Zagreb"], "collectedfrom": [{"key": "10|openaire____::db600878200645bd752cf7fd96a37df5", "value": "Rudjer Boskovic Institute Library - Croatian Projects"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics, Zagreb"}, "country": {"classid": "HR", "classname": "Croatia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-06-15", "type": 20, "id": "20|irb_hr______::d0147c5dfa57d00b5bbd8405366d5ed9"}
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::918297740"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "false"}, "websiteurl": {"value": "http://www.ifv.nl"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "country": {"classid": "NL", "classname": "Netherlands", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::490e6333fc4b5b2f0bfbb94875b57911"}
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::987994083"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "false"}, "websiteurl": {"value": "http://www.ifv.nl"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "country": {"classid": "NL", "classname": "Netherlands", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::3ace7e70172b7ddce2ffc8db335e7cd3"}
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::999637672"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "ANL"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "true"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "A I ALIKHANYAN NATIONAL SCIENCE LABORATORY"}, "country": {"classid": "AM", "classname": "Armenia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::f76c86a31f38609cd3b7930279d9c7c6"}
{"dateoftransformation": "2019-04-16", "originalId": ["aka_________::3117bf00abc3330b48bb270494d46ce4"], "collectedfrom": [{"key": "10|openaire____::6ac933301a3933c8a22ceebea7000326", "value": "Academy of Finland"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2019-01-25", "type": 20, "id": "20|aka_________::c32beace3046af7a121b15237b1e4747"}
{"dateoftransformation":"2019-05-04","originalId":["opendoar____::Free_University_of_Bozen_-_Bolzano"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.unibz.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Free University of Bozen - Bolzano"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2019-05-04","type":20,"id":"20|opendoar____::c230b60ca1a8a95150c3163e40899e5d"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::E7C60D41-51F7-4C46-89DC-4E8F6D7DC64B"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Free University of Bozen-Bolzano"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-07","type":20,"id":"20|rcuk________::47a7d6a960f874fdd7c2678f16276cbf"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::69417031-F8F1-4557-BF08-49096CDBF321"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"University of the Free State"},"country":{"classid":"ZA","classname":"South Africa","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::91d972791a1a3945078724a4ede959d4"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::9218106A-E8CE-46A5-AABC-B4C8ED148690"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"University of Amsterdam"},"country":{"classid":"NL","classname":"Netherlands","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::77c8206d9739a62c542db14a00d51fc9"}
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Université_Libre_de_Bruxelles"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.ulb.ac.be/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Université Libre de Bruxelles"},"country":{"classid":"BE","classname":"Belgium","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::4348f2277945e85ff4fa371c89d5209e"}
{"dateoftransformation":"2018-09-13","originalId":["nsf_________::Liberty_University__Inc_"],"collectedfrom":[{"value":"NSF - National Science Foundation","key":"10|openaire____::dd69b4a1513c9de9f46faf24048da1e8"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Liberty University, Inc."},"country":{"classid":"US","classname":"United States","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-03-10","type":20,"id":"20|nsf_________::ef77f1e8314313a4d53ec4f19054b733"}
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Vrije_Universiteit_Amsterdam"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"VU"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.vu.nl/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Vrije Universiteit Amsterdam"},"country":{"classid":"NL","classname":"Netherlands","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::40e0928728ca1ea6ebb147ad307fc7db"}
{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Burgas_Free_University"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Бургаски свободен университет"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.bfu.bg/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Burgas Free University"},"country":{"classid":"BG","classname":"Bulgaria","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::28a99bd2330504b0dfb6c44192757bde"}
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Université_libre_de_Bruxelles"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.ulb.ac.be/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Université libre de Bruxelles"},"country":{"classid":"BE","classname":"Belgium","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-07-21","type":20,"id":"20|opendoar____::0e3d292f95a8f13fed04d7b3ac872b9f"}
{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Freie_Universitat_Berlin"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Freie Universitat Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::5054b113a655361d929493a95d29e6f1"}
{"dateoftransformation":"2018-11-20","originalId":["corda_______::999879881"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"POLITECNICO DI MILANO"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.polimi.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"POLITECNICO DI MILANO"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::671b76de97f62c7ecf4a18cd5c5a24ce"}
{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999879881"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"PoliMi"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.polimi.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"POLITECNICO DI MILANO"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::671b76de97f62c7ecf4a18cd5c5a24ce"}
{"dateoftransformation":"2018-11-20","originalId":["corda_______::999866689"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UNIVPM"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.univpm.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSITA POLITECNICA DELLE MARCHE"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::f050abb6c5aadea5488de784874ce4e6"}
{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999977754"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"POLITO"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.polito.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"POLITECNICO DI TORINO"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::c565531bf1c09673c8d4ca4228b9d307"}
{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999866689"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UNIVPM"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.univpm.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSITA POLITECNICA DELLE MARCHE"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::f050abb6c5aadea5488de784874ce4e6"}
{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999431159"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"POLIBA"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.poliba.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"POLITECNICO DI BARI"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::395a43af55ac010c4781a6c8645db8a6"}
{"dateoftransformation":"2018-11-20","originalId":["corda_______::999431159"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"POLIBA"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"POLITECNICO DI BARI"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::395a43af55ac010c4781a6c8645db8a6"}
{"dateoftransformation":"2018-11-20","originalId":["corda_______::999977754"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"POLITO"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.polito.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"POLITECNICO DI TORINO"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::c565531bf1c09673c8d4ca4228b9d307"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::84803376-D657-41A3-A5DB-E1341282462E"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Polytechnic University of Milan"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-04","type":20,"id":"20|rcuk________::a1aacefbbd7d0b6ebd2085941388b46d"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::16A2AC44-FDDA-4753-A927-26136DDCDA6D"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Polytechnic University of Turin"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::146b02f079957d07f10099a0b53e02c3"}
{"dateoftransformation":"2019-06-26","originalId":["corda__h2020::999864846"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPV"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upv.es"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSITAT POLITECNICA DE VALENCIA"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::542f36b24ca4203a5b4dfc8396ef7475"}
{"dateoftransformation":"2019-06-26","originalId":["corda__h2020::999828859"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPCT"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upct.es"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSIDAD POLITECNICA DE CARTAGENA"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::9df7abeaef39ed5bac9c3e3a10a2be91"}
{"dateoftransformation":"2019-06-26","originalId":["corda__h2020::999974844"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPM"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upm.es"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSIDAD POLITECNICA DE MADRID"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::7a44a773d0dc629e9af5cef563c2478a"}
{"dateoftransformation":"2018-11-20","originalId":["corda_______::999976202"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPC"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"websiteurl":{"value":"http://www.upc.edu"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSITAT POLITECNICA DE CATALUNYA"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::0a1f410e6b3374e015b9aead9d97731a"}
{"dateoftransformation":"2018-11-20","originalId":["corda_______::999864846"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPV"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upv.es"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSITAT POLITECNICA DE VALENCIA"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::542f36b24ca4203a5b4dfc8396ef7475"}
{"dateoftransformation":"2018-11-20","originalId":["corda_______::999974844"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPM"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upm.es"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSIDAD POLITECNICA DE MADRID"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::7a44a773d0dc629e9af5cef563c2478a"}
{"dateoftransformation":"2019-06-26","originalId":["corda__h2020::999976202"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPC"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upc.edu"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSITAT POLITECNICA DE CATALUNYA"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::0a1f410e6b3374e015b9aead9d97731a"}
{"dateoftransformation":"2018-11-20","originalId":["corda_______::999828859"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"legalshortname":{"value":"UPCT"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"websiteurl":{"value":"http://www.upct.es"},"ecnutscode":{"value":"false"},"legalname":{"value":"UNIVERSIDAD POLITECNICA DE CARTAGENA"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::9df7abeaef39ed5bac9c3e3a10a2be91"}
{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Universidad_Politécnica_de_Madrid"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Universidad Politécnica de Madrid"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universidad Politécnica de Madrid"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::3cdfe6eec5fafec302e02cb7be692318"}
{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Universitat_Politècnica_de_València"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Universitat Politècnica de València"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica de València"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::b9fc5f6fb04d0f8d7d0b163718f9785c"}
{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Universidad_Politécnica_de_Cartagena"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Universidad Politécnica de Cartagena"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universidad Politécnica de Cartagena"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::cf3bddf37ca3440dbd8e5ba6e81f6df3"}
{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Universitat_Politècnica_de_Catalunya"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Universitat Politècnica de Catalunya"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica de Catalunya"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::7e232375e9902c823cf3cd4e1a16de5e"}
{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Universidad_Politécnica_de_Cataluña"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Universidad Politécnica de Cataluña"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universidad Politécnica de Cataluña"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::98e85e22b787db82b4c806e4f452f456"}
{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Universitat_Politècnica_deValència"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Universitat Politècnica deValència"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica deValència"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::637209da5f9f175227735f91e63d4999"}
{"dateoftransformation":"2018-09-13","originalId":["re3data_____::c9820581fcddf3a7a50652e4ac5e5aab"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.upc.edu"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica de Catalunya"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|re3data_____::044d264e43ee7cd7bfbc3deb1532037a"}
{"dateoftransformation":"2019-04-11","originalId":["re3data_____::211b4cb099c317e9cd9073476566ca47"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upv.es/index-en.html"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica de València"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2019-04-11","type":20,"id":"20|re3data_____::465c66fba7c037fde2dcc962f94e44a9"}
{"dateoftransformation":"2018-09-13","originalId":["re3data_____::fbdc48c28389453ae6008622c1e5b0e1"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPM"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upm.es/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universidad Politécnica de Madrid"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-02-13","type":20,"id":"20|re3data_____::0cbf4c44e9d045ffb0aa303354fff947"}
{"dateoftransformation":"2018-09-13","originalId":["snsf________::Universitat_Politècnica_de_Catalunya"],"collectedfrom":[{"value":"SNSF - Swiss National Science Foundation","key":"10|openaire____::d8f3c25e18304608ce8e816e99603d7a"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica de Catalunya"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-09-23","type":20,"id":"20|snsf________::7e232375e9902c823cf3cd4e1a16de5e"}
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Universidad_Politécnica_de_Cartagena"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPCT"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upct.es/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universidad Politécnica de Cartagena"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::cf3bddf37ca3440dbd8e5ba6e81f6df3"}
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Universitat_Politècnica_de_València"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upv.es/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica de València"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::b9fc5f6fb04d0f8d7d0b163718f9785c"}
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Universidad_Politecnica_de_Madrid"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upm.es/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universidad Politecnica de Madrid"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::c4b8d1e68de570b064c666c370bc2168"}
{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Universitat_Politènica_de_Catalunya"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPC"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upc.edu/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politènica de Catalunya"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::54e1f400c1b5dba4fa5a18571d50c3f3"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::B0DA50CA-D11E-4251-9678-4AA2F93DB545"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Polytechnic University of Catalonia"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-04","type":20,"id":"20|rcuk________::5cd82a9ed265a9cb392d185a688cad40"}
{"dateoftransformation":"2018-09-13","originalId":["openaire____::UE9MSS1SRUQgKFJldmlzdGFzIERpZ2l0YWxlcyBQb2xpdMOpY25pY2FzKQ==::UNIVERSIDAD POLITECNICA DE MADRID"],"collectedfrom":[{"value":"","key":""}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSIDAD POLITECNICA DE MADRID"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-11-03","type":20,"id":"20|openaire____::9f185b9f2f1c932b492f6fb53c8c5caf"}
{"dateoftransformation":"2018-09-13","originalId":["openaire____::issn24441309::UNIVERSIDAD POLITECNICA DE MADRID"],"collectedfrom":[{"value":"","key":""}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSIDAD POLITECNICA DE MADRID"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-11-03","type":20,"id":"20|openaire____::935af0561af9c6e33326818b32805241"}

View File

@ -1,6 +1,5 @@
package eu.dnetlib.pace.clustering;
import com.google.common.base.Joiner;
import eu.dnetlib.pace.common.AbstractPaceFunctions;
import java.util.*;
@ -20,14 +19,14 @@ public class KeywordsClustering extends AbstractClusteringFunction {
protected Collection<String> doApply(String s) {
//takes city codes and keywords codes without duplicates
Set<String> keywords = getCodes(s, translationMap, params.getOrDefault("windowSize", 4));
Set<String> cities = getCodes(s, cityMap, params.getOrDefault("windowSize", 4));
Set<String> keywords = getKeywords(s, params.getOrDefault("windowSize", 4));
Set<String> cities = getCities(s, params.getOrDefault("windowSize", 4));
//list of combination to return as result
final Collection<String> combinations = new LinkedHashSet<String>();
for (String keyword: keywords){
for (String city: cities) {
for (String keyword: keywordsToCodes(keywords)){
for (String city: citiesToCodes(cities)) {
combinations.add(keyword+"-"+city);
if (combinations.size()>=params.getOrDefault("max", 2)) {
return combinations;

View File

@ -18,6 +18,8 @@ import java.text.Normalizer;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Set of common functions
@ -27,6 +29,10 @@ import java.util.regex.Pattern;
*/
public abstract class AbstractPaceFunctions {
private static Map<String,String> translationMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/translation_map.csv");
private static Map<String,String> cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");
protected static Set<String> stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt");
protected static Set<String> stopwords_de = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt");
protected static Set<String> stopwords_es = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt");
@ -212,99 +218,58 @@ public abstract class AbstractPaceFunctions {
return sb.toString().trim();
}
public String keywordsToCode(String s1, Map<String, String> translationMap, int windowSize){
public String removeKeywords(String s, Set<String> keywords) {
List<String> tokens = Arrays.asList(s1.split(" "));
if (tokens.size()<windowSize)
windowSize = tokens.size();
int length = windowSize;
while (length != 0) {
for (int i = 0; i<=tokens.size()-length; i++){
String candidate = Joiner.on(" ").join(tokens.subList(i, i + length));
if (translationMap.containsKey(candidate)) {
s1 = (" " + s1 + " ").replaceAll(" " + candidate + " ", " " + translationMap.get(candidate) + " ");
}
}
length-=1;
s = " " + s + " ";
for (String k: keywords ) {
s = s.replaceAll(k.toLowerCase(), "");
}
return s1;
return s.trim();
}
public String removeCodes(String s) {
final String regexKey = "\\bkey::[0-9]*\\b";
final String regexCity = "\\bcity::[0-9]*\\b";
return s.replaceAll(regexKey, "").replaceAll(regexCity, "").trim();
}
public double keywordsCompare(Set<String> s1, Set<String> s2){
public double keywordsCompare(String s1, String s2){
Set<String> k1 = keywordsToCodes(s1);
Set<String> k2 = keywordsToCodes(s2);
List<String> keywords1 = getKeywords(s1);
List<String> keywords2 = getKeywords(s2);
int longer = (keywords1.size()>keywords2.size())?keywords1.size():keywords2.size();
int longer = (k1.size()>k2.size())?k1.size():k2.size();
if (getKeywords(s1).isEmpty() || getKeywords(s2).isEmpty())
if (k1.isEmpty() || k2.isEmpty())
return 1.0;
else
return (double)CollectionUtils.intersection(getKeywords(s1),getKeywords(s2)).size()/(double)longer;
return (double)CollectionUtils.intersection(k1,k2).size()/(double)longer;
}
//check if 2 strings have same keywords
public boolean sameKeywords(String s1, String s2){
//at least 1 keyword in common
if (getKeywords(s1).isEmpty() || getKeywords(s2).isEmpty())
return true;
else
return CollectionUtils.intersection(getKeywords(s1),getKeywords(s2)).size()>0;
}
//returns true if at least 1 city is in common
//returns true if a name has no cities
public boolean sameCity(String s1, String s2){
//returns true if no cities are contained in names
//returns false if one of the two names have no city
public boolean sameCity(Set<String> s1, Set<String> s2){
if (getCities(s1).isEmpty() || getCities(s2).isEmpty())
Set<String> c1 = citiesToCodes(s1);
Set<String> c2 = citiesToCodes(s2);
if (c1.isEmpty() && c2.isEmpty())
return true;
else
return CollectionUtils.intersection(getCities(s1), getCities(s2)).size()>0;
else {
if (c1.isEmpty() ^ c2.isEmpty())
return false;
return CollectionUtils.intersection(c1, c2).size() > 0;
}
}
//get the list of keywords in a string
public List<String> getCities(String s) {
final String regex = "\\bcity::[0-9]*\\b";
Pattern p = Pattern.compile(regex, Pattern.MULTILINE);
Matcher m = p.matcher(s);
List<String> codes = new ArrayList<>();
while (m.find()) {
codes.add(m.group(0));
for (int i = 1; i <= m.groupCount(); i++) {
codes.add(m.group(0));
}
}
return codes;
//convert the set of keywords to codes
public Set<String> toCodes(Set<String> keywords, Map<String, String> translationMap) {
return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet());
}
//get the list of keywords in a string
public List<String> getKeywords(String s) {
public Set<String> keywordsToCodes(Set<String> keywords) {
return toCodes(keywords, translationMap);
}
final String regex = "\\bkey::[0-9]*\\b";
Pattern p = Pattern.compile(regex, Pattern.MULTILINE);
Matcher m = p.matcher(s);
List<String> codes = new ArrayList<>();
while (m.find()) {
codes.add(m.group(0));
for (int i = 1; i <= m.groupCount(); i++) {
codes.add(m.group(0));
}
}
return codes;
public Set<String> citiesToCodes(Set<String> keywords) {
return toCodes(keywords, cityMap);
}
protected String firstLC(final String s) {
@ -320,7 +285,7 @@ public abstract class AbstractPaceFunctions {
}
//get the list of codes into the input string
public Set<String> getCodes(String s1, Map<String, String> translationMap, int windowSize){
public Set<String> getKeywords(String s1, Map<String, String> translationMap, int windowSize){
String s = cleanup(s1);
@ -340,7 +305,7 @@ public abstract class AbstractPaceFunctions {
for (int i = 0; i<=tokens.size()-length; i++){
String candidate = Joiner.on(" ").join(tokens.subList(i, i + length));
if (translationMap.containsKey(candidate)) {
codes.add(translationMap.get(candidate));
codes.add(candidate);
s = s.replace(candidate, "");
}
}
@ -352,4 +317,12 @@ public abstract class AbstractPaceFunctions {
return codes;
}
public Set<String> getKeywords(String s1, int windowSize) {
return getKeywords(s1, translationMap, windowSize);
}
public Set<String> getCities(String s1, int windowSize) {
return getKeywords(s1, cityMap, windowSize);
}
}

View File

@ -11,18 +11,6 @@ import java.util.Set;
@DistanceClass("JaroWinklerNormalizedName")
public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo {
private static Set<String> stopwordsEn = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt");
private static Set<String> stopwordsIt = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt");
private static Set<String> stopwordsDe = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt");
private static Set<String> stopwordsFr = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_fr.txt");
private static Set<String> stopwordsPt = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt");
private static Set<String> stopwordsEs = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt");
//key=word, value=global identifier => example: "università"->"university", used to substitute the word with the global identifier
private static Map<String,String> translationMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/translation_map.csv");
private static Map<String,String> cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");
private Map<String, Number> params;
public JaroWinklerNormalizedName(Map<String, Number> params){
@ -49,28 +37,30 @@ public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo {
ca = filterAllStopWords(ca);
cb = filterAllStopWords(cb);
//replace keywords with codes
String codesA = keywordsToCode(ca, translationMap, params.getOrDefault("windowSize", 4).intValue());
String codesB = keywordsToCode(cb, translationMap, params.getOrDefault("windowSize",4).intValue());
Set<String> keywords1 = getKeywords(ca, params.getOrDefault("windowSize", 4).intValue());
Set<String> keywords2 = getKeywords(cb, params.getOrDefault("windowSize", 4).intValue());
//replace cities with codes
codesA = keywordsToCode(codesA, cityMap, params.getOrDefault("windowSize", 4).intValue());
codesB = keywordsToCode(codesB, cityMap, params.getOrDefault("windowSize", 4).intValue());
Set<String> cities1 = getCities(ca, params.getOrDefault("windowSize", 4).intValue());
Set<String> cities2 = getCities(cb, params.getOrDefault("windowSize", 4).intValue());
if (sameCity(cities1,cities2)) {
if (keywordsCompare(keywords1, keywords2)>params.getOrDefault("threshold", 0.5).doubleValue()) {
ca = removeKeywords(ca, keywords1);
ca = removeKeywords(ca, cities1);
cb = removeKeywords(cb, keywords2);
cb = removeKeywords(cb, cities2);
//if two names have same city
if (sameCity(codesA,codesB)){
if (keywordsCompare(codesA, codesB)>params.getOrDefault("threshold", 0.5).doubleValue()) {
ca = removeCodes(codesA);
cb = removeCodes(codesB);
if (ca.isEmpty() && cb.isEmpty())
return 1.0;
else
return normalize(ssalgo.score(ca,cb));
}
}
return 0.0;
}
@Override

View File

@ -6192,7 +6192,7 @@ city::753142;Zoliborz;Zalborz;Zalbórz;
city::753866;Zamosc;Gorad Zamasc';Zamosc;Zamosc';Zamosca;Zamoscia;Zamose;Zamoshc;Zamoshch;Zamost'ye;Zamoste;Zamostja;Zamosts;Zamostė;Zamostye;Zamość;jamosichi;sa mxchch;zamoshichi;zamostsi;zha mo xi qi;zmwsz;Ζάμοστς;Горад Замасць;Замостя;Замосць;Замошч;Замошћ;Զամոշչ;זמושץ;ซามอชช์;ზამოსცი;ザモシチ;扎莫希奇;자모시치;
city::755330;Wola;Volja;Воля;
city::756092;Wawer;;
city::756135;Warsaw;Barsobia;Varsa;Varsava;Varsavia;Varsavja;Varshava;Varshavae;Varsja;Varsjá;Varso;Varsova;Varsovia;Varsovia - Warszawa;Varsovie;Varsovio;Varssavi;Varsuva;Varsòvia;Varsó;Varsóvia;Varşova;Varšava;Varšuva;Varșovia;Vársá;WAW;Warsaw;Warsawa;Warschau;Warskou;Warszaw;Warszawa;Waršawa;baleusyaba;hua sha;varshava;vorso;warsw;warushawa;wrsh;wrshw;wrsw;wxrsx;Βαρσοβία;Варшавæ;Варшава;Վարշավա;ווארשע;ורשה;װאַרשע;وارسو;ورشو;ۋارشاۋا;ܘܪܣܘ;वॉर्सो;วอร์ซอ;ვარშავა;ዋርሶው;ワルシャワ;华沙;華沙;바르샤바;
city::756135;Warsaw;warszawie;Barsobia;Varsa;Varsava;Varsavia;Varsavja;Varshava;Varshavae;Varsja;Varsjá;Varso;Varsova;Varsovia;Varsovia - Warszawa;Varsovie;Varsovio;Varssavi;Varsuva;Varsòvia;Varsó;Varsóvia;Varşova;Varšava;Varšuva;Varșovia;Vársá;WAW;Warsaw;Warsawa;Warschau;Warskou;Warszaw;Warszawa;Waršawa;baleusyaba;hua sha;varshava;vorso;warsw;warushawa;wrsh;wrshw;wrsw;wxrsx;Βαρσοβία;Варшавæ;Варшава;Վարշավա;ווארשע;ורשה;װאַרשע;وارسو;ورشو;ۋارشاۋا;ܘܪܣܘ;वॉर्सो;วอร์ซอ;ვარშავა;ዋርሶው;ワルシャワ;华沙;華沙;바르샤바;warszawskiej;warszawska;
city::756867;Tomaszow Mazowiecki;Mazovijos Tomasuvas;Mazovijos Tomašuvas;Thomasovia;Tomashov Mazovecki;Tomashov Mazovjecki;Tomashuv-Mazovec'kij;Tomashuv-Mazovecki;Tomashuv-Mazoveckij;Tomasova Mazovecka;Tomaszow;Tomaszow Mazowiecki;Tomaszów;Tomaszów Mazowiecki;Tomašova Mazovecka;ma zuo fu she de qu tuo ma shu fu;twmswb mzwbyyzqy;tx ma chuf ma sx weiyt ski;Томашов Мазовецки;Томашов Мазовјецки;Томашув-Мазовецки;Томашув-Мазовецкий;Томашув-Мазовецький;טומשוב מזובייצקי;ตอมาชูฟมาซอเวียตสกี;馬佐夫舍地區托馬舒夫;
city::757026;Tarnow;Gorad Tarnuu;Tarnov;Tarnova;Tarnow;Tarnuv;Tarnuvas;Tarnuw;Tarnów;Tarnůw;ta er nu fu;taleunupeu;tarnwf;tarunufu;trnwb;Горад Тарнуў;Тарнов;Тарнув;טארנע;טרנוב;تارنوف;タルヌフ;塔爾努夫;타르누프;
city::757033;Tarnobrzeg;Gorad Tarnobzhag;Nova Tarnovia;QEP;Tarnobjeg;Tarnobrzeg;Tarnobrzyg;Tarnobzega;Tarnobzegas;Tarnobzheg;Tarnobzhege;Tarnobžega;Tarnobžegas;ta er nuo bu re ge;taleunobeujekeu;tarnwbzk;tarunobujeku;trnwbzg;Горад Тарнобжаг;Тарнобжег;Тарнобжеге;דזשיקאוו;טרנובזג;تارنوبزک;タルノブジェク;塔尔诺布热格;타르노브제크;
@ -6229,7 +6229,7 @@ city::3080165;Zielona Gora;Gorad Zjaljona-Gura;Gruentberg;Grunberg;Grünberg;IEG
city::3080251;Zgierz;Gorad Zgezh;Zgeza;Zgezas;Zgezh;Zgeža;Zgežas;Zgierz;Zgjezh;Znkies;ci gai ri;jeugiesi;sex keiyr ch;zgyyz;zugyeshi;Ζγκιες;Горад Згеж;Згеж;Згјеж;זגייז;เซอเกียร์ช;ズギェシ;兹盖日;즈기에시;
city::3080526;Zawiercie;Zaverce;Zaverche;Zavercis;Zavertse;Zaviercis;Zavjerce;Zawiercie;zha wei er qie;zwwyyrzh;Заверце;Заверче;Завјерће;זוויירצה;扎维尔切;
city::3080985;Zabrze;Gorad Zabzheh;Hindenburg;Zabje;Zaborze;Zabrze;Zabzas;Zabze;Zabzhe;Zabžas;Zabže;Zobrze;jabeuje;sab che;zabjh;zabuje;zabzhh;zha bu re;zʼbzh;Горад Забжэ;Забже;Զաբժե;זאבזה;زابجه;زابژه;زبرزے;ซาบเช;ザブジェ;扎布热;자브제;
city::3081368;Wroclaw;Brassel;Breslau;Breslavia;Breslavl';Breslavl;Gorad Vroclau;Vratislav;Vratislavia;Vroclav;Vroclava;Vroclavas;Vroclavo;Vroklave;Vroslav;Vrotslav;WRO;Wroclaw;Wroclow;Wrocław;Wrocłow;Wroklaw;Wroslaw;Wrosław;Wrócław;beulocheuwapeu;frwtswaf;fu luo ci wa fu;viratscahp;vrotsavapha;vrotslavi;vurotsuwafu;w rxtswaf;wrwslaw;wrwtswaf;wrwzlb;Βρότσλαβ;Вроцлав;Горад Вроцлаў;ברעסלוי;ורוצלב;فروتسواف;وروتسواف;وروسلاو;ڤرۆتسواف;व्रोत्सवाफ;விராத்ஸ்சாஃப்;วรอตสวัฟ;ვროცლავი;ヴロツワフ;弗罗茨瓦夫;브로츠와프;
city::3081368;Wroclaw;Brassel;Breslau;Breslavia;Breslavl';Breslavl;Gorad Vroclau;Vratislav;Vratislavia;Vroclav;Vroclava;Vroclavas;Vroclavo;Vroklave;Vroslav;Vrotslav;WRO;Wroclaw;Wroclow;Wrocław;Wrocłow;Wroklaw;Wroslaw;Wrosław;Wrócław;beulocheuwapeu;frwtswaf;fu luo ci wa fu;viratscahp;vrotsavapha;vrotslavi;vurotsuwafu;w rxtswaf;wrwslaw;wrwtswaf;wrwzlb;Βρότσλαβ;Вроцлав;Горад Вроцлаў;ברעסלוי;ורוצלב;فروتسواف;وروتسواف;وروسلاو;ڤرۆتسواف;व्रोत्सवाफ;விராத்ஸ்சாஃப்;วรอตสวัฟ;ვროცლავი;ヴロツワフ;弗罗茨瓦夫;브로츠와프;Wrocławska;
city::3081741;Wloclawek;Gorad Ulaclavak;Leslau;Vloclavek;Vloclaveka;Vloclavekas;Wloclawek;Włocławek;beuwocheuwabekeu;fu wo ci wa wei ke;fwtswafk;vuu~otsuwavu~eku;w wxts wa wek;Влоцлавек;Горад Улацлавак;ולוצלאווק;فوتسوافك;ووتس‌واوک;ววอตซวาเวก;ヴウォツワヴェク;弗沃茨瓦韦克;브워츠와베크;
city::3082707;Walbrzych;Gorad Valbzhykh;Valbzhikh;Valbziha;Valbzychas;Valbžiha;Valbžychas;Walbrzych;Waldenburg;Waldenburg in Schlesien;Wałbrzych;baubeujiheu;vu~aubujifu;wa lb cik;wa wu bu ri he;wawb jykh;wawb zhykh;wlbzyk;Валбжих;Горад Валбжых;ולבזיך;واوب جيخ;واوب ژیخ;วาลบ์จิก;ヴァウブジフ;瓦烏布日赫;바우브지흐;
city::3082914;Tychy;Tichau;Tihi;Tikhi;Tychos;Tychy;Tykhy;Tıhı;di hei;thi khi;tihi;tixa;tyshy;Тихи;Тыхы;تیشی;ทิคี;ティヒ;蒂黑;티히;

Can't render this file because it is too large.

View File

@ -1,6 +1,6 @@
key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;universitaet;Uniwersytet;университет;universiteit;πανεπιστήμιο
key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές
key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα
key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα;wyzsza;
key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο
key::5;hospital;ospedale;hôpital;hospital;hospital;Krankenhaus;szpital;больница;ziekenhuis;νοσοκομείο
key::6;research;ricerca;recherche;investigacion;pesquisa;Forschung;badania;исследования;onderzoek;έρευνα;erevna;erevnas
@ -38,7 +38,7 @@ key::37;federation;federazione;fédération;федерация;federatie;ομο
key::38;observatory;osservatorio;observatoire;обсерватория;observatorium;αστεροσκοπείο
key::39;bureau;ufficio;bureau;офис;bureau;γραφείο
key::40;company;impresa;compagnie;société;компания;bedrijf;εταιρία
key::41;polytechnic;politecnico;polytechnique;политехника;polytechnisch;πολυτεχνείο;universita politecnica;polytechnic university
key::41;polytechnic;politecnico;polytechnique;политехника;polytechnisch;πολυτεχνείο;universita politecnica;polytechnic university;politechnika;politechniki;university technology;university science technology;
key::42;coalition;coalizione;coalition;коалиция;coalitie;συνασπισμός
key::43;initiative;iniziativa;initiative;инициатива;initiatief;πρωτοβουλία
key::44;academic;accademico;académique;universitaire;акадеческий academisch;ακαδημαϊκός;ακαδημαϊκή;ακαδημαϊκό;ακαδημαϊκές;ακαδημαϊκοί
@ -47,13 +47,13 @@ key::46;division;divisione;division;отделение;divisie;τμήμα
key::47;committee;comitato;comité;комитет;commissie;επιτροπή
key::48;promotion;promozione;продвижение;proothisis;forderung
key::49;medical;medicine;clinical;medicina;clinici;médico;medicina;clínica;médico;medicina;clínica;medizinisch;Medizin;klinisch;medisch;geneeskunde;klinisch;ιατρικός;ιατρική;ιατρικό;ιατρικά;κλινικός;κλινική;κλινικό;κλινικά;tıbbi;tıp;klinik;orvosi;orvostudomány;klinikai;zdravniški;medicinski;klinični;meditsiini;kliinik;kliiniline;
key::50;technology;technological;tecnologia;tecnologie;tecnología;tecnológico;tecnologia;tecnológico;Technologie;technologisch;technologie;technologisch;τεχνολογία;τεχνολογικός;τεχνολογική;τεχνολογικό;teknoloji;teknolojik;technológia;technológiai;tehnologija;tehnološki;tehnoloogia;tehnoloogiline;
key::50;technology;technological;tecnologia;tecnologie;tecnología;tecnológico;tecnologia;tecnológico;Technologie;technologisch;technologie;technologisch;τεχνολογία;τεχνολογικός;τεχνολογική;τεχνολογικό;teknoloji;teknolojik;technológia;technológiai;tehnologija;tehnološki;tehnoloogia;tehnoloogiline;technologii;
key::51;science;scientific;scienza;scientifiche;scienze;ciencia;científico;ciência;científico;Wissenschaft;wissenschaftlich;wetenschap;wetenschappelijk;επιστήμη;επιστημονικός;επιστημονική;επιστημονικό;επιστημονικά;bilim;bilimsel;tudomány;tudományos;znanost;znanstveni;teadus;teaduslik;
key::52;engineering;ingegneria;ingeniería;engenharia;Ingenieurwissenschaft;ingenieurswetenschappen;bouwkunde;μηχανικός;μηχανική;μηχανικό;mühendislik;mérnöki;Inženirstvo;inseneeria;inseneri;
key::53;management;gestione;gestionale;gestionali;gestión;administración;gestão;administração;Verwaltung;management;διαχείριση;yönetim;menedzsment;vodstvo;upravljanje;management;juhtkond;juhtimine;haldus;
key::54;energy;energia;energía;energia;Energie;energie;ενέργεια;enerji;energia;energija;energia;
key::55;agricultural;agriculture;agricoltura;agricole;agrícola;agricultura;agrícola;agricultura;landwirtschaftlich;Landwirtschaft;landbouwkundig;landbouw;αγροτικός;αγροτική;αγροτικό;γεωργικός;γεωργική;γεωργικό;γεωργία;tarımsal;tarım;mezőgazdasági;mezőgazdaság;poljedelski;poljedelstvo;põllumajandus;põllumajanduslik;
key::56;information;informazione;información;informação;Information;informatie;πληροφορία;bilgi;információ;informacija;informatsioon;
key::56;information;informazione;información;informação;Information;informatie;πληροφορία;bilgi;információ;informacija;informatsioon;informatycznych;
key::57;social;sociali;social;social;Sozial;sociaal;maatschappelijk;κοινωνικός;κοινωνική;κοινωνικό;κοινωνικά;sosyal;szociális;družbeni;sotsiaal;sotsiaalne;
key::58;environmental;ambiente;medioambiental;ambiente;medioambiente;meioambiente;Umwelt;milieu;milieuwetenschap;milieukunde;περιβαλλοντικός;περιβαλλοντική;περιβαλλοντικό;περιβαλλοντικά;çevre;környezeti;okoliški;keskonna;;
key::59;business;economia;economiche;economica;negocio;empresa;negócio;Unternehmen;bedrijf;bedrijfskunde;επιχείρηση;iş;üzleti;posel;ettevõte/äri;
@ -71,7 +71,7 @@ key::70;veterinary;veterinaria;veterinarie;veterinaria;veterinária;tierärtzlic
key::71;chemistry;chimica;química;química;Chemie;chemie;scheikunde;χημεία;kimya;kémia;kemija;keemia;
key::72;security;sicurezza;seguridad;segurança;Sicherheit;veiligheid;ασφάλεια;güvenlik;biztonsági;varnost;turvalisus;julgeolek;
key::73;biotechnology;biotecnologia;biotecnologie;biotecnología;biotecnologia;Biotechnologie;biotechnologie;βιοτεχνολογία;biyoteknoloji;biotechnológia;biotehnologija;biotehnoloogia;
key::74;military;militare;militari;militar;militar;Militär;militair;leger;στρατιωτικός;στρατιωτική;στρατιωτικό;στρατιωτικά;askeri;katonai;vojaški;vojni;militaar;
key::74;military;militare;militari;militar;militar;Militär;militair;leger;στρατιωτικός;στρατιωτική;στρατιωτικό;στρατιωτικά;askeri;katonai;vojaški;vojni;militaar;wojskowa;
key::75;theological;teologia;teologico;teológico;tecnológica;theologisch;theologisch;θεολογικός;θεολογική;θεολογικό;θεολογικά;teolojik;technológiai;teološki;teoloogia;usuteadus;teoloogiline;
key::76;electronics;elettronica;electrónica;eletrônicos;Elektronik;elektronica;ηλεκτρονική;elektronik;elektronika;elektronika;elektroonika;
key::77;forestry;forestale;forestali;silvicultura;forestal;floresta;Forstwirtschaft;bosbouw;δασοκομία;δασολογία;ormancılık;erdészet;gozdarstvo;metsandus;
@ -100,4 +100,4 @@ key::99;neurology;neurologia;neurologiche;neurología;neurologia;Neurologie;neur
key::100;geology;geologia;geologiche;geología;geologia;Geologie;geologie;aardkunde;γεωλογία;jeoloji;geológia;földtudomány;geologija;geoloogia;
key::101;microbiology;microbiologia;micro-biologia;microbiologiche;microbiología;microbiologia;Mikrobiologie;microbiologie;μικροβιολογία;mikrobiyoloji;mikrobiológia;mikrobiologija;mikrobioloogia;
key::102;informatics;informatica;informática;informática;informatica;
key:103;forschungsgemeinschaft;comunita ricerca;research community;research foundation;research association
key:103;forschungsgemeinschaft;comunita ricerca;research community;research foundation;research association
1 key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;universitaet;Uniwersytet;университет;universiteit;πανεπιστήμιο
2 key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές
3 key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα;wyzsza;
4 key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο
5 key::5;hospital;ospedale;hôpital;hospital;hospital;Krankenhaus;szpital;больница;ziekenhuis;νοσοκομείο
6 key::6;research;ricerca;recherche;investigacion;pesquisa;Forschung;badania;исследования;onderzoek;έρευνα;erevna;erevnas
38 key::38;observatory;osservatorio;observatoire;обсерватория;observatorium;αστεροσκοπείο
39 key::39;bureau;ufficio;bureau;офис;bureau;γραφείο
40 key::40;company;impresa;compagnie;société;компания;bedrijf;εταιρία
41 key::41;polytechnic;politecnico;polytechnique;политехника;polytechnisch;πολυτεχνείο;universita politecnica;polytechnic university key::41;polytechnic;politecnico;polytechnique;политехника;polytechnisch;πολυτεχνείο;universita politecnica;polytechnic university;politechnika;politechniki;university technology;university science technology;
42 key::42;coalition;coalizione;coalition;коалиция;coalitie;συνασπισμός
43 key::43;initiative;iniziativa;initiative;инициатива;initiatief;πρωτοβουλία
44 key::44;academic;accademico;académique;universitaire;акадеческий academisch;ακαδημαϊκός;ακαδημαϊκή;ακαδημαϊκό;ακαδημαϊκές;ακαδημαϊκοί
47 key::47;committee;comitato;comité;комитет;commissie;επιτροπή
48 key::48;promotion;promozione;продвижение;proothisis;forderung
49 key::49;medical;medicine;clinical;medicina;clinici;médico;medicina;clínica;médico;medicina;clínica;medizinisch;Medizin;klinisch;medisch;geneeskunde;klinisch;ιατρικός;ιατρική;ιατρικό;ιατρικά;κλινικός;κλινική;κλινικό;κλινικά;tıbbi;tıp;klinik;orvosi;orvostudomány;klinikai;zdravniški;medicinski;klinični;meditsiini;kliinik;kliiniline;
50 key::50;technology;technological;tecnologia;tecnologie;tecnología;tecnológico;tecnologia;tecnológico;Technologie;technologisch;technologie;technologisch;τεχνολογία;τεχνολογικός;τεχνολογική;τεχνολογικό;teknoloji;teknolojik;technológia;technológiai;tehnologija;tehnološki;tehnoloogia;tehnoloogiline; key::50;technology;technological;tecnologia;tecnologie;tecnología;tecnológico;tecnologia;tecnológico;Technologie;technologisch;technologie;technologisch;τεχνολογία;τεχνολογικός;τεχνολογική;τεχνολογικό;teknoloji;teknolojik;technológia;technológiai;tehnologija;tehnološki;tehnoloogia;tehnoloogiline;technologii;
51 key::51;science;scientific;scienza;scientifiche;scienze;ciencia;científico;ciência;científico;Wissenschaft;wissenschaftlich;wetenschap;wetenschappelijk;επιστήμη;επιστημονικός;επιστημονική;επιστημονικό;επιστημονικά;bilim;bilimsel;tudomány;tudományos;znanost;znanstveni;teadus;teaduslik;
52 key::52;engineering;ingegneria;ingeniería;engenharia;Ingenieurwissenschaft;ingenieurswetenschappen;bouwkunde;μηχανικός;μηχανική;μηχανικό;mühendislik;mérnöki;Inženirstvo;inseneeria;inseneri;
53 key::53;management;gestione;gestionale;gestionali;gestión;administración;gestão;administração;Verwaltung;management;διαχείριση;yönetim;menedzsment;vodstvo;upravljanje;management;juhtkond;juhtimine;haldus;
54 key::54;energy;energia;energía;energia;Energie;energie;ενέργεια;enerji;energia;energija;energia;
55 key::55;agricultural;agriculture;agricoltura;agricole;agrícola;agricultura;agrícola;agricultura;landwirtschaftlich;Landwirtschaft;landbouwkundig;landbouw;αγροτικός;αγροτική;αγροτικό;γεωργικός;γεωργική;γεωργικό;γεωργία;tarımsal;tarım;mezőgazdasági;mezőgazdaság;poljedelski;poljedelstvo;põllumajandus;põllumajanduslik;
56 key::56;information;informazione;información;informação;Information;informatie;πληροφορία;bilgi;információ;informacija;informatsioon; key::56;information;informazione;información;informação;Information;informatie;πληροφορία;bilgi;információ;informacija;informatsioon;informatycznych;
57 key::57;social;sociali;social;social;Sozial;sociaal;maatschappelijk;κοινωνικός;κοινωνική;κοινωνικό;κοινωνικά;sosyal;szociális;družbeni;sotsiaal;sotsiaalne;
58 key::58;environmental;ambiente;medioambiental;ambiente;medioambiente;meioambiente;Umwelt;milieu;milieuwetenschap;milieukunde;περιβαλλοντικός;περιβαλλοντική;περιβαλλοντικό;περιβαλλοντικά;çevre;környezeti;okoliški;keskonna;;
59 key::59;business;economia;economiche;economica;negocio;empresa;negócio;Unternehmen;bedrijf;bedrijfskunde;επιχείρηση;iş;üzleti;posel;ettevõte/äri;
71 key::71;chemistry;chimica;química;química;Chemie;chemie;scheikunde;χημεία;kimya;kémia;kemija;keemia;
72 key::72;security;sicurezza;seguridad;segurança;Sicherheit;veiligheid;ασφάλεια;güvenlik;biztonsági;varnost;turvalisus;julgeolek;
73 key::73;biotechnology;biotecnologia;biotecnologie;biotecnología;biotecnologia;Biotechnologie;biotechnologie;βιοτεχνολογία;biyoteknoloji;biotechnológia;biotehnologija;biotehnoloogia;
74 key::74;military;militare;militari;militar;militar;Militär;militair;leger;στρατιωτικός;στρατιωτική;στρατιωτικό;στρατιωτικά;askeri;katonai;vojaški;vojni;militaar; key::74;military;militare;militari;militar;militar;Militär;militair;leger;στρατιωτικός;στρατιωτική;στρατιωτικό;στρατιωτικά;askeri;katonai;vojaški;vojni;militaar;wojskowa;
75 key::75;theological;teologia;teologico;teológico;tecnológica;theologisch;theologisch;θεολογικός;θεολογική;θεολογικό;θεολογικά;teolojik;technológiai;teološki;teoloogia;usuteadus;teoloogiline;
76 key::76;electronics;elettronica;electrónica;eletrônicos;Elektronik;elektronica;ηλεκτρονική;elektronik;elektronika;elektronika;elektroonika;
77 key::77;forestry;forestale;forestali;silvicultura;forestal;floresta;Forstwirtschaft;bosbouw;δασοκομία;δασολογία;ormancılık;erdészet;gozdarstvo;metsandus;
100 key::100;geology;geologia;geologiche;geología;geologia;Geologie;geologie;aardkunde;γεωλογία;jeoloji;geológia;földtudomány;geologija;geoloogia;
101 key::101;microbiology;microbiologia;micro-biologia;microbiologiche;microbiología;microbiologia;Mikrobiologie;microbiologie;μικροβιολογία;mikrobiyoloji;mikrobiológia;mikrobiologija;mikrobioloogia;
102 key::102;informatics;informatica;informática;informática;informatica;
103 key:103;forschungsgemeinschaft;comunita ricerca;research community;research foundation;research association

View File

@ -137,6 +137,10 @@ public class ClusteringFunctionTest extends AbstractPaceTest {
System.out.println("s3 = " + s3);
System.out.println(cf.apply(Lists.newArrayList(title(s3))));
final String s4 = "Politechniki Warszawskiej (Warsaw University of Technology)";
System.out.println("s4 = " + s4);
System.out.println(cf.apply(Lists.newArrayList(title(s4))));
}
}

View File

@ -51,7 +51,7 @@ public class DistanceAlgoTest extends AbstractPaceFunctions {
double result = jaroWinklerNormalizedName.distance("Free University of Bozen-Bolzano", "University of the Free State");
System.out.println("result = " + result);
assertEquals(1.0, result);
assertEquals(0.0, result);
}
@Test
@ -114,4 +114,13 @@ public class DistanceAlgoTest extends AbstractPaceFunctions {
assertTrue(result> 0.9);
}
@Test
public void testJaroWinklerNormalizedName8() {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("Politechniki Warszawskiej (Warsaw University of Technology)", "Warsaw University of Technology");
System.out.println("result = " + result);
}
}