From 84974dcdfa4f5bacd9f55dd6c71ce3a023f2ce26 Mon Sep 17 00:00:00 2001 From: miconis Date: Fri, 19 Jul 2019 17:10:29 +0200 Subject: [PATCH] restyling of the JaroWinklerNormalizedName comparator, now it is optimized. Addition of some translations in the translation maps, addition of a clustering based on keywords in organizations legalnames --- .../main/java/eu/dnetlib/SparkLocalTest.java | 2 + .../resources/eu/dnetlib/pace/org.curr.conf | 4 +- .../eu/dnetlib/pace/organization.to.fix.json | 65 ++++------ .../pace/clustering/KeywordsClustering.java | 9 +- .../pace/common/AbstractPaceFunctions.java | 121 +++++++----------- .../algo/JaroWinklerNormalizedName.java | 38 ++---- .../eu/dnetlib/pace/config/city_map.csv | 4 +- .../dnetlib/pace/config/translation_map.csv | 12 +- .../clustering/ClusteringFunctionTest.java | 4 + .../pace/distance/DistanceAlgoTest.java | 11 +- 10 files changed, 117 insertions(+), 153 deletions(-) diff --git a/dnet-dedup-test/src/main/java/eu/dnetlib/SparkLocalTest.java b/dnet-dedup-test/src/main/java/eu/dnetlib/SparkLocalTest.java index 4abbdd5..8a1783a 100644 --- a/dnet-dedup-test/src/main/java/eu/dnetlib/SparkLocalTest.java +++ b/dnet-dedup-test/src/main/java/eu/dnetlib/SparkLocalTest.java @@ -46,6 +46,8 @@ public class SparkLocalTest { return new Tuple2<>(mapDocument.getIdentifier(), mapDocument); }); +// mapDocs.foreach(doc -> System.out.println("doc = " + doc._2().getFieldMap().get("legalname"))); + // mapDocs.filter(d -> d._2().getFieldMap().get("doi").stringValue().length() > 0).foreach(d -> System.out.println(d)); // mapDocs.filter(d -> d._2().getFieldMap().get("documentationUrl").stringValue().length() > 0).foreach(d -> System.out.println(d)); diff --git a/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/org.curr.conf b/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/org.curr.conf index f8a76ac..4384df8 100644 --- a/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/org.curr.conf +++ b/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/org.curr.conf @@ -31,6 +31,8 @@ { "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/websiteurl/value", "params" : { "host" : 0.5, "path" : 0.5 } }, { "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {grid}]/value" } ], - "blacklists" : { } + "blacklists" : { + "legalname" : [] + } } } \ No newline at end of file diff --git a/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/organization.to.fix.json b/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/organization.to.fix.json index 305b68e..ab544bf 100644 --- a/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/organization.to.fix.json +++ b/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/organization.to.fix.json @@ -1,40 +1,25 @@ -{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Humboldt_State_University"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.humboldt.edu/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt State University"},"country":{"classid":"US","classname":"United States","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::9c3522c59aef0edab19b8a3f0aeb39ed"} -{"dateoftransformation":"2019-05-19","originalId":["rcuk________::9758583A-FF1E-41C4-9176-B875E8FAC110"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt State University"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-04","type":20,"id":"20|rcuk________::7715018b4838eaf1d57242c788e222d4"} -{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Humboldt-Universität_zu_Berlin"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"HU"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::13ab9ef517038d3751f4b0e31aea9ac7"} -{"dateoftransformation":"2018-09-27","originalId":["re3data_____::678d9d5a712331f6e2fce7b7b764090f"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/de/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-27","type":20,"id":"20|re3data_____::aeb488fd15eb1be77b998b5602450910"} -{"dateoftransformation":"2019-05-19","originalId":["rcuk________::CFF4C944-5CF1-4AE3-8C03-BE361D6DEDC3"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt University Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::ff1bccdf9520b3fadd2fc26103231de0"} -{"dateoftransformation":"2018-09-27","originalId":["re3data_____::4bda5f07be19914ce8e2e4652a72151c"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-27","type":20,"id":"20|re3data_____::d72a4f4665f7df9b48a22d4cfde0dd3c"} -{"dateoftransformation":"2018-09-13","originalId":["nih_________::HUMBOLDT_STATE_UNIVERSITY"],"collectedfrom":[{"value":"NIH - National Institutes of Health","key":"10|openaire____::9e9e8c76d739212c63eff362e321ba33"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT STATE UNIVERSITY"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-07-14","type":20,"id":"20|nih_________::8aec7ec3198fc69ce74e24b8f6aa9a59"} -{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Humboldt-Universität_zu_Berlin"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Humboldt-Universität zu Berlin"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::13ab9ef517038d3751f4b0e31aea9ac7"} -{"dateoftransformation":"2018-11-20","originalId":["corda_______::999850781"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UBER"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.hu-berlin.de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT-UNIVERSITAT ZU BERLIN"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::d999b4c2dc81ccd40100056ab0543088"} -{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999850781"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UBER"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.hu-berlin.de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT-UNIVERSITAET ZU BERLIN"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::d999b4c2dc81ccd40100056ab0543088"} -{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::9169966C-E38A-41D7-AF04-F7470963CBED"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-04", "type": 20, "id": "20|rcuk________::3eb464c9a21582d7dbb3f115710d863c"} -{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::CEEF86B3-BB98-4CAE-848D-00837C745DEC"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Yerevan Physics Institute"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-03", "type": 20, "id": "20|rcuk________::f8790ac2aa4eb6fb7cc2980eb7971ee6"} -{"dateoftransformation": "2018-09-13", "originalId": ["opendoar____::Aalto_University"], "collectedfrom": [{"key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb", "value": "OpenDOAR"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.aalto.fi/en/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2015-08-24", "type": 20, "id": "20|opendoar____::98845925f422ef4987294d6bfac525dd"} -{"dateoftransformation": "2018-09-13", "originalId": ["nsf_________::Institute_of_Physics"], "collectedfrom": [{"key": "10|openaire____::dd69b4a1513c9de9f46faf24048da1e8", "value": "NSF - National Science Foundation"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics"}, "country": {"classid": "YU", "classname": "Yugoslavia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-03-10", "type": 20, "id": "20|nsf_________::d540e5e89b1ace31d5dd0b8f658056ec"} -{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::EE4840D3-84C0-47A3-9109-30F67D0D550F"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-03", "type": 20, "id": "20|rcuk________::7559c8bbff5125d74919775a1f290496"} -{"dateoftransformation": "2019-05-29", "originalId": ["irb_hr______::Institute of Physics, Zagreb"], "collectedfrom": [{"key": "10|openaire____::db600878200645bd752cf7fd96a37df5", "value": "Rudjer Boskovic Institute Library - Croatian Projects"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics, Zagreb"}, "country": {"classid": "HR", "classname": "Croatia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-06-15", "type": 20, "id": "20|irb_hr______::d0147c5dfa57d00b5bbd8405366d5ed9"} -{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::918297740"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "false"}, "websiteurl": {"value": "http://www.ifv.nl"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "country": {"classid": "NL", "classname": "Netherlands", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::490e6333fc4b5b2f0bfbb94875b57911"} -{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::987994083"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "false"}, "websiteurl": {"value": "http://www.ifv.nl"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "country": {"classid": "NL", "classname": "Netherlands", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::3ace7e70172b7ddce2ffc8db335e7cd3"} -{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::999637672"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "ANL"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "true"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "A I ALIKHANYAN NATIONAL SCIENCE LABORATORY"}, "country": {"classid": "AM", "classname": "Armenia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::f76c86a31f38609cd3b7930279d9c7c6"} -{"dateoftransformation": "2019-04-16", "originalId": ["aka_________::3117bf00abc3330b48bb270494d46ce4"], "collectedfrom": [{"key": "10|openaire____::6ac933301a3933c8a22ceebea7000326", "value": "Academy of Finland"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2019-01-25", "type": 20, "id": "20|aka_________::c32beace3046af7a121b15237b1e4747"} -{"dateoftransformation":"2019-05-04","originalId":["opendoar____::Free_University_of_Bozen_-_Bolzano"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.unibz.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Free University of Bozen - Bolzano"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2019-05-04","type":20,"id":"20|opendoar____::c230b60ca1a8a95150c3163e40899e5d"} -{"dateoftransformation":"2019-05-19","originalId":["rcuk________::E7C60D41-51F7-4C46-89DC-4E8F6D7DC64B"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Free University of Bozen-Bolzano"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-07","type":20,"id":"20|rcuk________::47a7d6a960f874fdd7c2678f16276cbf"} -{"dateoftransformation":"2019-05-19","originalId":["rcuk________::69417031-F8F1-4557-BF08-49096CDBF321"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"University of the Free State"},"country":{"classid":"ZA","classname":"South Africa","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::91d972791a1a3945078724a4ede959d4"} -{"dateoftransformation":"2019-05-19","originalId":["rcuk________::9218106A-E8CE-46A5-AABC-B4C8ED148690"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"University of Amsterdam"},"country":{"classid":"NL","classname":"Netherlands","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::77c8206d9739a62c542db14a00d51fc9"} -{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Université_Libre_de_Bruxelles"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.ulb.ac.be/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Université Libre de Bruxelles"},"country":{"classid":"BE","classname":"Belgium","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::4348f2277945e85ff4fa371c89d5209e"} -{"dateoftransformation":"2018-09-13","originalId":["nsf_________::Liberty_University__Inc_"],"collectedfrom":[{"value":"NSF - National Science Foundation","key":"10|openaire____::dd69b4a1513c9de9f46faf24048da1e8"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Liberty University, Inc."},"country":{"classid":"US","classname":"United States","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-03-10","type":20,"id":"20|nsf_________::ef77f1e8314313a4d53ec4f19054b733"} -{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Vrije_Universiteit_Amsterdam"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"VU"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.vu.nl/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Vrije Universiteit Amsterdam"},"country":{"classid":"NL","classname":"Netherlands","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::40e0928728ca1ea6ebb147ad307fc7db"} -{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Burgas_Free_University"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Бургаски свободен университет"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.bfu.bg/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Burgas Free University"},"country":{"classid":"BG","classname":"Bulgaria","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::28a99bd2330504b0dfb6c44192757bde"} -{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Université_libre_de_Bruxelles"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.ulb.ac.be/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Université libre de Bruxelles"},"country":{"classid":"BE","classname":"Belgium","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-07-21","type":20,"id":"20|opendoar____::0e3d292f95a8f13fed04d7b3ac872b9f"} -{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Freie_Universitat_Berlin"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Freie Universitat Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::5054b113a655361d929493a95d29e6f1"} -{"dateoftransformation":"2018-11-20","originalId":["corda_______::999879881"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"POLITECNICO DI MILANO"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.polimi.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"POLITECNICO DI MILANO"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::671b76de97f62c7ecf4a18cd5c5a24ce"} -{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999879881"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"PoliMi"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.polimi.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"POLITECNICO DI MILANO"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::671b76de97f62c7ecf4a18cd5c5a24ce"} -{"dateoftransformation":"2018-11-20","originalId":["corda_______::999866689"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UNIVPM"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.univpm.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSITA POLITECNICA DELLE MARCHE"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::f050abb6c5aadea5488de784874ce4e6"} -{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999977754"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"POLITO"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.polito.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"POLITECNICO DI TORINO"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::c565531bf1c09673c8d4ca4228b9d307"} -{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999866689"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UNIVPM"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.univpm.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSITA POLITECNICA DELLE MARCHE"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::f050abb6c5aadea5488de784874ce4e6"} -{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999431159"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"POLIBA"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.poliba.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"POLITECNICO DI BARI"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::395a43af55ac010c4781a6c8645db8a6"} -{"dateoftransformation":"2018-11-20","originalId":["corda_______::999431159"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"POLIBA"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"POLITECNICO DI BARI"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::395a43af55ac010c4781a6c8645db8a6"} -{"dateoftransformation":"2018-11-20","originalId":["corda_______::999977754"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"POLITO"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.polito.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"POLITECNICO DI TORINO"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::c565531bf1c09673c8d4ca4228b9d307"} -{"dateoftransformation":"2019-05-19","originalId":["rcuk________::84803376-D657-41A3-A5DB-E1341282462E"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Polytechnic University of Milan"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-04","type":20,"id":"20|rcuk________::a1aacefbbd7d0b6ebd2085941388b46d"} -{"dateoftransformation":"2019-05-19","originalId":["rcuk________::16A2AC44-FDDA-4753-A927-26136DDCDA6D"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Polytechnic University of Turin"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::146b02f079957d07f10099a0b53e02c3"} \ No newline at end of file +{"dateoftransformation":"2019-06-26","originalId":["corda__h2020::999864846"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPV"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upv.es"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSITAT POLITECNICA DE VALENCIA"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::542f36b24ca4203a5b4dfc8396ef7475"} +{"dateoftransformation":"2019-06-26","originalId":["corda__h2020::999828859"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPCT"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upct.es"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSIDAD POLITECNICA DE CARTAGENA"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::9df7abeaef39ed5bac9c3e3a10a2be91"} +{"dateoftransformation":"2019-06-26","originalId":["corda__h2020::999974844"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPM"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upm.es"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSIDAD POLITECNICA DE MADRID"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::7a44a773d0dc629e9af5cef563c2478a"} +{"dateoftransformation":"2018-11-20","originalId":["corda_______::999976202"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPC"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"websiteurl":{"value":"http://www.upc.edu"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSITAT POLITECNICA DE CATALUNYA"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::0a1f410e6b3374e015b9aead9d97731a"} +{"dateoftransformation":"2018-11-20","originalId":["corda_______::999864846"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPV"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upv.es"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSITAT POLITECNICA DE VALENCIA"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::542f36b24ca4203a5b4dfc8396ef7475"} +{"dateoftransformation":"2018-11-20","originalId":["corda_______::999974844"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPM"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upm.es"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSIDAD POLITECNICA DE MADRID"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::7a44a773d0dc629e9af5cef563c2478a"} +{"dateoftransformation":"2019-06-26","originalId":["corda__h2020::999976202"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPC"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upc.edu"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSITAT POLITECNICA DE CATALUNYA"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::0a1f410e6b3374e015b9aead9d97731a"} +{"dateoftransformation":"2018-11-20","originalId":["corda_______::999828859"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"legalshortname":{"value":"UPCT"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"websiteurl":{"value":"http://www.upct.es"},"ecnutscode":{"value":"false"},"legalname":{"value":"UNIVERSIDAD POLITECNICA DE CARTAGENA"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::9df7abeaef39ed5bac9c3e3a10a2be91"} +{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Universidad_Politécnica_de_Madrid"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Universidad Politécnica de Madrid"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universidad Politécnica de Madrid"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::3cdfe6eec5fafec302e02cb7be692318"} +{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Universitat_Politècnica_de_València"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Universitat Politècnica de València"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica de València"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::b9fc5f6fb04d0f8d7d0b163718f9785c"} +{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Universidad_Politécnica_de_Cartagena"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Universidad Politécnica de Cartagena"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universidad Politécnica de Cartagena"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::cf3bddf37ca3440dbd8e5ba6e81f6df3"} +{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Universitat_Politècnica_de_Catalunya"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Universitat Politècnica de Catalunya"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica de Catalunya"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::7e232375e9902c823cf3cd4e1a16de5e"} +{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Universidad_Politécnica_de_Cataluña"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Universidad Politécnica de Cataluña"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universidad Politécnica de Cataluña"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::98e85e22b787db82b4c806e4f452f456"} +{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Universitat_Politècnica_deValència"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Universitat Politècnica deValència"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica deValència"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::637209da5f9f175227735f91e63d4999"} +{"dateoftransformation":"2018-09-13","originalId":["re3data_____::c9820581fcddf3a7a50652e4ac5e5aab"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.upc.edu"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica de Catalunya"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|re3data_____::044d264e43ee7cd7bfbc3deb1532037a"} +{"dateoftransformation":"2019-04-11","originalId":["re3data_____::211b4cb099c317e9cd9073476566ca47"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upv.es/index-en.html"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica de València"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2019-04-11","type":20,"id":"20|re3data_____::465c66fba7c037fde2dcc962f94e44a9"} +{"dateoftransformation":"2018-09-13","originalId":["re3data_____::fbdc48c28389453ae6008622c1e5b0e1"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPM"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upm.es/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universidad Politécnica de Madrid"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-02-13","type":20,"id":"20|re3data_____::0cbf4c44e9d045ffb0aa303354fff947"} +{"dateoftransformation":"2018-09-13","originalId":["snsf________::Universitat_Politècnica_de_Catalunya"],"collectedfrom":[{"value":"SNSF - Swiss National Science Foundation","key":"10|openaire____::d8f3c25e18304608ce8e816e99603d7a"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica de Catalunya"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-09-23","type":20,"id":"20|snsf________::7e232375e9902c823cf3cd4e1a16de5e"} +{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Universidad_Politécnica_de_Cartagena"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPCT"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upct.es/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universidad Politécnica de Cartagena"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::cf3bddf37ca3440dbd8e5ba6e81f6df3"} +{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Universitat_Politècnica_de_València"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upv.es/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politècnica de València"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::b9fc5f6fb04d0f8d7d0b163718f9785c"} +{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Universidad_Politecnica_de_Madrid"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upm.es/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universidad Politecnica de Madrid"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::c4b8d1e68de570b064c666c370bc2168"} +{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Universitat_Politènica_de_Catalunya"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UPC"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.upc.edu/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Universitat Politènica de Catalunya"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::54e1f400c1b5dba4fa5a18571d50c3f3"} +{"dateoftransformation":"2019-05-19","originalId":["rcuk________::B0DA50CA-D11E-4251-9678-4AA2F93DB545"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Polytechnic University of Catalonia"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-04","type":20,"id":"20|rcuk________::5cd82a9ed265a9cb392d185a688cad40"} +{"dateoftransformation":"2018-09-13","originalId":["openaire____::UE9MSS1SRUQgKFJldmlzdGFzIERpZ2l0YWxlcyBQb2xpdMOpY25pY2FzKQ==::UNIVERSIDAD POLITECNICA DE MADRID"],"collectedfrom":[{"value":"","key":""}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSIDAD POLITECNICA DE MADRID"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-11-03","type":20,"id":"20|openaire____::9f185b9f2f1c932b492f6fb53c8c5caf"} +{"dateoftransformation":"2018-09-13","originalId":["openaire____::issn24441309::UNIVERSIDAD POLITECNICA DE MADRID"],"collectedfrom":[{"value":"","key":""}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"UNIVERSIDAD POLITECNICA DE MADRID"},"country":{"classid":"ES","classname":"Spain","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-11-03","type":20,"id":"20|openaire____::935af0561af9c6e33326818b32805241"} \ No newline at end of file diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java index dfd5938..29d0952 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java @@ -1,6 +1,5 @@ package eu.dnetlib.pace.clustering; -import com.google.common.base.Joiner; import eu.dnetlib.pace.common.AbstractPaceFunctions; import java.util.*; @@ -20,14 +19,14 @@ public class KeywordsClustering extends AbstractClusteringFunction { protected Collection doApply(String s) { //takes city codes and keywords codes without duplicates - Set keywords = getCodes(s, translationMap, params.getOrDefault("windowSize", 4)); - Set cities = getCodes(s, cityMap, params.getOrDefault("windowSize", 4)); + Set keywords = getKeywords(s, params.getOrDefault("windowSize", 4)); + Set cities = getCities(s, params.getOrDefault("windowSize", 4)); //list of combination to return as result final Collection combinations = new LinkedHashSet(); - for (String keyword: keywords){ - for (String city: cities) { + for (String keyword: keywordsToCodes(keywords)){ + for (String city: citiesToCodes(cities)) { combinations.add(keyword+"-"+city); if (combinations.size()>=params.getOrDefault("max", 2)) { return combinations; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java index 748eec9..a5f7bf6 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java @@ -18,6 +18,8 @@ import java.text.Normalizer; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Set of common functions @@ -27,6 +29,10 @@ import java.util.regex.Pattern; */ public abstract class AbstractPaceFunctions { + + private static Map translationMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/translation_map.csv"); + private static Map cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv"); + protected static Set stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt"); protected static Set stopwords_de = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt"); protected static Set stopwords_es = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt"); @@ -212,99 +218,58 @@ public abstract class AbstractPaceFunctions { return sb.toString().trim(); } - public String keywordsToCode(String s1, Map translationMap, int windowSize){ + public String removeKeywords(String s, Set keywords) { - List tokens = Arrays.asList(s1.split(" ")); - - if (tokens.size() s1, Set s2){ - public double keywordsCompare(String s1, String s2){ + Set k1 = keywordsToCodes(s1); + Set k2 = keywordsToCodes(s2); - List keywords1 = getKeywords(s1); - List keywords2 = getKeywords(s2); - int longer = (keywords1.size()>keywords2.size())?keywords1.size():keywords2.size(); + int longer = (k1.size()>k2.size())?k1.size():k2.size(); - if (getKeywords(s1).isEmpty() || getKeywords(s2).isEmpty()) + if (k1.isEmpty() || k2.isEmpty()) return 1.0; else - return (double)CollectionUtils.intersection(getKeywords(s1),getKeywords(s2)).size()/(double)longer; + return (double)CollectionUtils.intersection(k1,k2).size()/(double)longer; } - //check if 2 strings have same keywords - public boolean sameKeywords(String s1, String s2){ - //at least 1 keyword in common - if (getKeywords(s1).isEmpty() || getKeywords(s2).isEmpty()) - return true; - else - return CollectionUtils.intersection(getKeywords(s1),getKeywords(s2)).size()>0; - } - //returns true if at least 1 city is in common - //returns true if a name has no cities - public boolean sameCity(String s1, String s2){ + //returns true if no cities are contained in names + //returns false if one of the two names have no city + public boolean sameCity(Set s1, Set s2){ - if (getCities(s1).isEmpty() || getCities(s2).isEmpty()) + Set c1 = citiesToCodes(s1); + Set c2 = citiesToCodes(s2); + + if (c1.isEmpty() && c2.isEmpty()) return true; - else - return CollectionUtils.intersection(getCities(s1), getCities(s2)).size()>0; + else { + if (c1.isEmpty() ^ c2.isEmpty()) + return false; + return CollectionUtils.intersection(c1, c2).size() > 0; + } } - //get the list of keywords in a string - public List getCities(String s) { - - final String regex = "\\bcity::[0-9]*\\b"; - - Pattern p = Pattern.compile(regex, Pattern.MULTILINE); - Matcher m = p.matcher(s); - List codes = new ArrayList<>(); - while (m.find()) { - codes.add(m.group(0)); - for (int i = 1; i <= m.groupCount(); i++) { - codes.add(m.group(0)); - } - } - return codes; + //convert the set of keywords to codes + public Set toCodes(Set keywords, Map translationMap) { + return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet()); } - //get the list of keywords in a string - public List getKeywords(String s) { + public Set keywordsToCodes(Set keywords) { + return toCodes(keywords, translationMap); + } - final String regex = "\\bkey::[0-9]*\\b"; - - Pattern p = Pattern.compile(regex, Pattern.MULTILINE); - Matcher m = p.matcher(s); - List codes = new ArrayList<>(); - while (m.find()) { - codes.add(m.group(0)); - for (int i = 1; i <= m.groupCount(); i++) { - codes.add(m.group(0)); - } - } - return codes; + public Set citiesToCodes(Set keywords) { + return toCodes(keywords, cityMap); } protected String firstLC(final String s) { @@ -320,7 +285,7 @@ public abstract class AbstractPaceFunctions { } //get the list of codes into the input string - public Set getCodes(String s1, Map translationMap, int windowSize){ + public Set getKeywords(String s1, Map translationMap, int windowSize){ String s = cleanup(s1); @@ -340,7 +305,7 @@ public abstract class AbstractPaceFunctions { for (int i = 0; i<=tokens.size()-length; i++){ String candidate = Joiner.on(" ").join(tokens.subList(i, i + length)); if (translationMap.containsKey(candidate)) { - codes.add(translationMap.get(candidate)); + codes.add(candidate); s = s.replace(candidate, ""); } } @@ -352,4 +317,12 @@ public abstract class AbstractPaceFunctions { return codes; } + public Set getKeywords(String s1, int windowSize) { + return getKeywords(s1, translationMap, windowSize); + } + + public Set getCities(String s1, int windowSize) { + return getKeywords(s1, cityMap, windowSize); + } + } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerNormalizedName.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerNormalizedName.java index 285575a..546629b 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerNormalizedName.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerNormalizedName.java @@ -11,18 +11,6 @@ import java.util.Set; @DistanceClass("JaroWinklerNormalizedName") public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo { - private static Set stopwordsEn = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt"); - private static Set stopwordsIt = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt"); - private static Set stopwordsDe = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt"); - private static Set stopwordsFr = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_fr.txt"); - private static Set stopwordsPt = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt"); - private static Set stopwordsEs = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt"); - - //key=word, value=global identifier => example: "università"->"university", used to substitute the word with the global identifier - private static Map translationMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/translation_map.csv"); - - private static Map cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv"); - private Map params; public JaroWinklerNormalizedName(Map params){ @@ -49,28 +37,30 @@ public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo { ca = filterAllStopWords(ca); cb = filterAllStopWords(cb); - //replace keywords with codes - String codesA = keywordsToCode(ca, translationMap, params.getOrDefault("windowSize", 4).intValue()); - String codesB = keywordsToCode(cb, translationMap, params.getOrDefault("windowSize",4).intValue()); + Set keywords1 = getKeywords(ca, params.getOrDefault("windowSize", 4).intValue()); + Set keywords2 = getKeywords(cb, params.getOrDefault("windowSize", 4).intValue()); - //replace cities with codes - codesA = keywordsToCode(codesA, cityMap, params.getOrDefault("windowSize", 4).intValue()); - codesB = keywordsToCode(codesB, cityMap, params.getOrDefault("windowSize", 4).intValue()); + Set cities1 = getCities(ca, params.getOrDefault("windowSize", 4).intValue()); + Set cities2 = getCities(cb, params.getOrDefault("windowSize", 4).intValue()); + + if (sameCity(cities1,cities2)) { + + if (keywordsCompare(keywords1, keywords2)>params.getOrDefault("threshold", 0.5).doubleValue()) { + + ca = removeKeywords(ca, keywords1); + ca = removeKeywords(ca, cities1); + cb = removeKeywords(cb, keywords2); + cb = removeKeywords(cb, cities2); - //if two names have same city - if (sameCity(codesA,codesB)){ - if (keywordsCompare(codesA, codesB)>params.getOrDefault("threshold", 0.5).doubleValue()) { - ca = removeCodes(codesA); - cb = removeCodes(codesB); if (ca.isEmpty() && cb.isEmpty()) return 1.0; else return normalize(ssalgo.score(ca,cb)); + } } return 0.0; - } @Override diff --git a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/city_map.csv b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/city_map.csv index e2d4855..6666157 100644 --- a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/city_map.csv +++ b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/city_map.csv @@ -6192,7 +6192,7 @@ city::753142;Zoliborz;Zalborz;Zalbórz; city::753866;Zamosc;Gorad Zamasc';Zamosc;Zamosc';Zamosca;Zamoscia;Zamose;Zamoshc;Zamoshch;Zamost'ye;Zamoste;Zamostja;Zamosts;Zamostė;Zamost’ye;Zamość;jamosichi;sa mxchch;zamoshichi;zamostsi;zha mo xi qi;zmwsz;Ζάμοστς;Горад Замасць;Замостя;Замосць;Замошч;Замошћ;Զամոշչ;זמושץ;ซามอชช์;ზამოსცი;ザモシチ;扎莫希奇;자모시치; city::755330;Wola;Volja;Воля; city::756092;Wawer;; -city::756135;Warsaw;Barsobia;Varsa;Varsava;Varsavia;Varsavja;Varshava;Varshavae;Varsja;Varsjá;Varso;Varsova;Varsovia;Varsovia - Warszawa;Varsovie;Varsovio;Varssavi;Varsuva;Varsòvia;Varsó;Varsóvia;Varşova;Varšava;Varšuva;Varșovia;Vársá;WAW;Warsaw;Warsawa;Warschau;Warskou;Warszaw;Warszawa;Waršawa;baleusyaba;hua sha;varshava;vorso;warsw;warushawa;wrsh;wrshw;wrsw;wxrsx;Βαρσοβία;Варшавæ;Варшава;Վարշավա;ווארשע;ורשה;װאַרשע;وارسو;ورشو;ۋارشاۋا;ܘܪܣܘ;वॉर्सो;วอร์ซอ;ვარშავა;ዋርሶው;ワルシャワ;华沙;華沙;바르샤바; +city::756135;Warsaw;warszawie;Barsobia;Varsa;Varsava;Varsavia;Varsavja;Varshava;Varshavae;Varsja;Varsjá;Varso;Varsova;Varsovia;Varsovia - Warszawa;Varsovie;Varsovio;Varssavi;Varsuva;Varsòvia;Varsó;Varsóvia;Varşova;Varšava;Varšuva;Varșovia;Vársá;WAW;Warsaw;Warsawa;Warschau;Warskou;Warszaw;Warszawa;Waršawa;baleusyaba;hua sha;varshava;vorso;warsw;warushawa;wrsh;wrshw;wrsw;wxrsx;Βαρσοβία;Варшавæ;Варшава;Վարշավա;ווארשע;ורשה;װאַרשע;وارسو;ورشو;ۋارشاۋا;ܘܪܣܘ;वॉर्सो;วอร์ซอ;ვარშავა;ዋርሶው;ワルシャワ;华沙;華沙;바르샤바;warszawskiej;warszawska; city::756867;Tomaszow Mazowiecki;Mazovijos Tomasuvas;Mazovijos Tomašuvas;Thomasovia;Tomashov Mazovecki;Tomashov Mazovjecki;Tomashuv-Mazovec'kij;Tomashuv-Mazovecki;Tomashuv-Mazoveckij;Tomasova Mazovecka;Tomaszow;Tomaszow Mazowiecki;Tomaszów;Tomaszów Mazowiecki;Tomašova Mazovecka;ma zuo fu she de qu tuo ma shu fu;twmswb mzwbyyzqy;tx ma chuf ma sx weiyt ski;Томашов Мазовецки;Томашов Мазовјецки;Томашув-Мазовецки;Томашув-Мазовецкий;Томашув-Мазовецький;טומשוב מזובייצקי;ตอมาชูฟมาซอเวียตสกี;馬佐夫舍地區托馬舒夫; city::757026;Tarnow;Gorad Tarnuu;Tarnov;Tarnova;Tarnow;Tarnuv;Tarnuvas;Tarnuw;Tarnów;Tarnůw;ta er nu fu;taleunupeu;tarnwf;tarunufu;trnwb;Горад Тарнуў;Тарнов;Тарнув;טארנע;טרנוב;تارنوف;タルヌフ;塔爾努夫;타르누프; city::757033;Tarnobrzeg;Gorad Tarnobzhag;Nova Tarnovia;QEP;Tarnobjeg;Tarnobrzeg;Tarnobrzyg;Tarnobzega;Tarnobzegas;Tarnobzheg;Tarnobzhege;Tarnobžega;Tarnobžegas;ta er nuo bu re ge;taleunobeujekeu;tarnwbzk;tarunobujeku;trnwbzg;Горад Тарнобжаг;Тарнобжег;Тарнобжеге;דזשיקאוו;טרנובזג;تارنوبزک;タルノブジェク;塔尔诺布热格;타르노브제크; @@ -6229,7 +6229,7 @@ city::3080165;Zielona Gora;Gorad Zjaljona-Gura;Gruentberg;Grunberg;Grünberg;IEG city::3080251;Zgierz;Gorad Zgezh;Zgeza;Zgezas;Zgezh;Zgeža;Zgežas;Zgierz;Zgjezh;Znkies;ci gai ri;jeugiesi;sex keiyr ch;zgyyz;zugyeshi;Ζγκιες;Горад Згеж;Згеж;Згјеж;זגייז;เซอเกียร์ช;ズギェシ;兹盖日;즈기에시; city::3080526;Zawiercie;Zaverce;Zaverche;Zavercis;Zavertse;Zaviercis;Zavjerce;Zawiercie;zha wei er qie;zwwyyrzh;Заверце;Заверче;Завјерће;זוויירצה;扎维尔切; city::3080985;Zabrze;Gorad Zabzheh;Hindenburg;Zabje;Zaborze;Zabrze;Zabzas;Zabze;Zabzhe;Zabžas;Zabže;Zobrze;jabeuje;sab che;zabjh;zabuje;zabzhh;zha bu re;zʼbzh;Горад Забжэ;Забже;Զաբժե;זאבזה;زابجه;زابژه;زبرزے;ซาบเช;ザブジェ;扎布热;자브제; -city::3081368;Wroclaw;Brassel;Breslau;Breslavia;Breslavl';Breslavl’;Gorad Vroclau;Vratislav;Vratislavia;Vroclav;Vroclava;Vroclavas;Vroclavo;Vroklave;Vroslav;Vrotslav;WRO;Wroclaw;Wroclow;Wrocław;Wrocłow;Wroklaw;Wroslaw;Wrosław;Wrócław;beulocheuwapeu;frwtswaf;fu luo ci wa fu;viratscahp;vrotsavapha;vrotslavi;vurotsuwafu;w rxtswaf;wrwslaw;wrwtswaf;wrwzlb;Βρότσλαβ;Вроцлав;Горад Вроцлаў;ברעסלוי;ורוצלב;فروتسواف;وروتسواف;وروسلاو;ڤرۆتسواف;व्रोत्सवाफ;விராத்ஸ்சாஃப்;วรอตสวัฟ;ვროცლავი;ヴロツワフ;弗罗茨瓦夫;브로츠와프; +city::3081368;Wroclaw;Brassel;Breslau;Breslavia;Breslavl';Breslavl’;Gorad Vroclau;Vratislav;Vratislavia;Vroclav;Vroclava;Vroclavas;Vroclavo;Vroklave;Vroslav;Vrotslav;WRO;Wroclaw;Wroclow;Wrocław;Wrocłow;Wroklaw;Wroslaw;Wrosław;Wrócław;beulocheuwapeu;frwtswaf;fu luo ci wa fu;viratscahp;vrotsavapha;vrotslavi;vurotsuwafu;w rxtswaf;wrwslaw;wrwtswaf;wrwzlb;Βρότσλαβ;Вроцлав;Горад Вроцлаў;ברעסלוי;ורוצלב;فروتسواف;وروتسواف;وروسلاو;ڤرۆتسواف;व्रोत्सवाफ;விராத்ஸ்சாஃப்;วรอตสวัฟ;ვროცლავი;ヴロツワフ;弗罗茨瓦夫;브로츠와프;Wrocławska; city::3081741;Wloclawek;Gorad Ulaclavak;Leslau;Vloclavek;Vloclaveka;Vloclavekas;Wloclawek;Włocławek;beuwocheuwabekeu;fu wo ci wa wei ke;fwtswafk;vuu~otsuwavu~eku;w wxts wa wek;Влоцлавек;Горад Улацлавак;ולוצלאווק;فوتسوافك;ووتس‌واوک;ววอตซวาเวก;ヴウォツワヴェク;弗沃茨瓦韦克;브워츠와베크; city::3082707;Walbrzych;Gorad Valbzhykh;Valbzhikh;Valbziha;Valbzychas;Valbžiha;Valbžychas;Walbrzych;Waldenburg;Waldenburg in Schlesien;Wałbrzych;baubeujiheu;vu~aubujifu;wa lb cik;wa wu bu ri he;wawb jykh;wawb zhykh;wlbzyk;Валбжих;Горад Валбжых;ולבזיך;واوب جيخ;واوب ژیخ;วาลบ์จิก;ヴァウブジフ;瓦烏布日赫;바우브지흐; city::3082914;Tychy;Tichau;Tihi;Tikhi;Tychos;Tychy;Tykhy;Tıhı;di hei;thi khi;tihi;tixa;tyshy;Тихи;Тыхы;تیشی;ทิคี;ティヒ;蒂黑;티히; diff --git a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv index dc77a60..bab6836 100644 --- a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv +++ b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv @@ -1,6 +1,6 @@ key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;universitaet;Uniwersytet;университет;universiteit;πανεπιστήμιο key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές -key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα +key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα;wyzsza; key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο key::5;hospital;ospedale;hôpital;hospital;hospital;Krankenhaus;szpital;больница;ziekenhuis;νοσοκομείο key::6;research;ricerca;recherche;investigacion;pesquisa;Forschung;badania;исследования;onderzoek;έρευνα;erevna;erevnas @@ -38,7 +38,7 @@ key::37;federation;federazione;fédération;федерация;federatie;ομο key::38;observatory;osservatorio;observatoire;обсерватория;observatorium;αστεροσκοπείο key::39;bureau;ufficio;bureau;офис;bureau;γραφείο key::40;company;impresa;compagnie;société;компания;bedrijf;εταιρία -key::41;polytechnic;politecnico;polytechnique;политехника;polytechnisch;πολυτεχνείο;universita politecnica;polytechnic university +key::41;polytechnic;politecnico;polytechnique;политехника;polytechnisch;πολυτεχνείο;universita politecnica;polytechnic university;politechnika;politechniki;university technology;university science technology; key::42;coalition;coalizione;coalition;коалиция;coalitie;συνασπισμός key::43;initiative;iniziativa;initiative;инициатива;initiatief;πρωτοβουλία key::44;academic;accademico;académique;universitaire;акадеческий academisch;ακαδημαϊκός;ακαδημαϊκή;ακαδημαϊκό;ακαδημαϊκές;ακαδημαϊκοί @@ -47,13 +47,13 @@ key::46;division;divisione;division;отделение;divisie;τμήμα key::47;committee;comitato;comité;комитет;commissie;επιτροπή key::48;promotion;promozione;продвижение;proothisis;forderung key::49;medical;medicine;clinical;medicina;clinici;médico;medicina;clínica;médico;medicina;clínica;medizinisch;Medizin;klinisch;medisch;geneeskunde;klinisch;ιατρικός;ιατρική;ιατρικό;ιατρικά;κλινικός;κλινική;κλινικό;κλινικά;tıbbi;tıp;klinik;orvosi;orvostudomány;klinikai;zdravniški;medicinski;klinični;meditsiini;kliinik;kliiniline; -key::50;technology;technological;tecnologia;tecnologie;tecnología;tecnológico;tecnologia;tecnológico;Technologie;technologisch;technologie;technologisch;τεχνολογία;τεχνολογικός;τεχνολογική;τεχνολογικό;teknoloji;teknolojik;technológia;technológiai;tehnologija;tehnološki;tehnoloogia;tehnoloogiline; +key::50;technology;technological;tecnologia;tecnologie;tecnología;tecnológico;tecnologia;tecnológico;Technologie;technologisch;technologie;technologisch;τεχνολογία;τεχνολογικός;τεχνολογική;τεχνολογικό;teknoloji;teknolojik;technológia;technológiai;tehnologija;tehnološki;tehnoloogia;tehnoloogiline;technologii; key::51;science;scientific;scienza;scientifiche;scienze;ciencia;científico;ciência;científico;Wissenschaft;wissenschaftlich;wetenschap;wetenschappelijk;επιστήμη;επιστημονικός;επιστημονική;επιστημονικό;επιστημονικά;bilim;bilimsel;tudomány;tudományos;znanost;znanstveni;teadus;teaduslik; key::52;engineering;ingegneria;ingeniería;engenharia;Ingenieurwissenschaft;ingenieurswetenschappen;bouwkunde;μηχανικός;μηχανική;μηχανικό;mühendislik;mérnöki;Inženirstvo;inseneeria;inseneri; key::53;management;gestione;gestionale;gestionali;gestión;administración;gestão;administração;Verwaltung;management;διαχείριση;yönetim;menedzsment;vodstvo;upravljanje;management;juhtkond;juhtimine;haldus; key::54;energy;energia;energía;energia;Energie;energie;ενέργεια;enerji;energia;energija;energia; key::55;agricultural;agriculture;agricoltura;agricole;agrícola;agricultura;agrícola;agricultura;landwirtschaftlich;Landwirtschaft;landbouwkundig;landbouw;αγροτικός;αγροτική;αγροτικό;γεωργικός;γεωργική;γεωργικό;γεωργία;tarımsal;tarım;mezőgazdasági;mezőgazdaság;poljedelski;poljedelstvo;põllumajandus;põllumajanduslik; -key::56;information;informazione;información;informação;Information;informatie;πληροφορία;bilgi;információ;informacija;informatsioon; +key::56;information;informazione;información;informação;Information;informatie;πληροφορία;bilgi;információ;informacija;informatsioon;informatycznych; key::57;social;sociali;social;social;Sozial;sociaal;maatschappelijk;κοινωνικός;κοινωνική;κοινωνικό;κοινωνικά;sosyal;szociális;družbeni;sotsiaal;sotsiaalne; key::58;environmental;ambiente;medioambiental;ambiente;medioambiente;meioambiente;Umwelt;milieu;milieuwetenschap;milieukunde;περιβαλλοντικός;περιβαλλοντική;περιβαλλοντικό;περιβαλλοντικά;çevre;környezeti;okoliški;keskonna;; key::59;business;economia;economiche;economica;negocio;empresa;negócio;Unternehmen;bedrijf;bedrijfskunde;επιχείρηση;iş;üzleti;posel;ettevõte/äri; @@ -71,7 +71,7 @@ key::70;veterinary;veterinaria;veterinarie;veterinaria;veterinária;tierärtzlic key::71;chemistry;chimica;química;química;Chemie;chemie;scheikunde;χημεία;kimya;kémia;kemija;keemia; key::72;security;sicurezza;seguridad;segurança;Sicherheit;veiligheid;ασφάλεια;güvenlik;biztonsági;varnost;turvalisus;julgeolek; key::73;biotechnology;biotecnologia;biotecnologie;biotecnología;biotecnologia;Biotechnologie;biotechnologie;βιοτεχνολογία;biyoteknoloji;biotechnológia;biotehnologija;biotehnoloogia; -key::74;military;militare;militari;militar;militar;Militär;militair;leger;στρατιωτικός;στρατιωτική;στρατιωτικό;στρατιωτικά;askeri;katonai;vojaški;vojni;militaar; +key::74;military;militare;militari;militar;militar;Militär;militair;leger;στρατιωτικός;στρατιωτική;στρατιωτικό;στρατιωτικά;askeri;katonai;vojaški;vojni;militaar;wojskowa; key::75;theological;teologia;teologico;teológico;tecnológica;theologisch;theologisch;θεολογικός;θεολογική;θεολογικό;θεολογικά;teolojik;technológiai;teološki;teoloogia;usuteadus;teoloogiline; key::76;electronics;elettronica;electrónica;eletrônicos;Elektronik;elektronica;ηλεκτρονική;elektronik;elektronika;elektronika;elektroonika; key::77;forestry;forestale;forestali;silvicultura;forestal;floresta;Forstwirtschaft;bosbouw;δασοκομία;δασολογία;ormancılık;erdészet;gozdarstvo;metsandus; @@ -100,4 +100,4 @@ key::99;neurology;neurologia;neurologiche;neurología;neurologia;Neurologie;neur key::100;geology;geologia;geologiche;geología;geologia;Geologie;geologie;aardkunde;γεωλογία;jeoloji;geológia;földtudomány;geologija;geoloogia; key::101;microbiology;microbiologia;micro-biologia;microbiologiche;microbiología;microbiologia;Mikrobiologie;microbiologie;μικροβιολογία;mikrobiyoloji;mikrobiológia;mikrobiologija;mikrobioloogia; key::102;informatics;informatica;informática;informática;informatica; -key:103;forschungsgemeinschaft;comunita ricerca;research community;research foundation;research association +key:103;forschungsgemeinschaft;comunita ricerca;research community;research foundation;research association \ No newline at end of file diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java index b845c39..58f86d0 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java @@ -137,6 +137,10 @@ public class ClusteringFunctionTest extends AbstractPaceTest { System.out.println("s3 = " + s3); System.out.println(cf.apply(Lists.newArrayList(title(s3)))); + final String s4 = "Politechniki Warszawskiej (Warsaw University of Technology)"; + System.out.println("s4 = " + s4); + System.out.println(cf.apply(Lists.newArrayList(title(s4)))); + } } diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java index 6f196ec..11ae418 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java @@ -51,7 +51,7 @@ public class DistanceAlgoTest extends AbstractPaceFunctions { double result = jaroWinklerNormalizedName.distance("Free University of Bozen-Bolzano", "University of the Free State"); System.out.println("result = " + result); - assertEquals(1.0, result); + assertEquals(0.0, result); } @Test @@ -114,4 +114,13 @@ public class DistanceAlgoTest extends AbstractPaceFunctions { assertTrue(result> 0.9); } + @Test + public void testJaroWinklerNormalizedName8() { + final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params); + + double result = jaroWinklerNormalizedName.distance("Politechniki Warszawskiej (Warsaw University of Technology)", "Warsaw University of Technology"); + + System.out.println("result = " + result); + } + }