diff --git a/dhp-build/dhp-build-properties-maven-plugin/test.properties b/dhp-build/dhp-build-properties-maven-plugin/test.properties index a0b77cc..94e315b 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/test.properties +++ b/dhp-build/dhp-build-properties-maven-plugin/test.properties @@ -1,2 +1,2 @@ -# Thu Jan 13 17:22:11 CET 2022 +# Tue Mar 15 14:58:05 CET 2022 projectPropertyKey=projectPropertyValue diff --git a/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupLocalTest.java b/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupLocalTest.java index 99822a1..9a2e617 100644 --- a/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupLocalTest.java +++ b/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupLocalTest.java @@ -261,12 +261,12 @@ public class DedupLocalTest extends DedupTestUtils { @Ignore public void dedupTest() throws Exception { final String entitiesPath = Paths - .get(DedupLocalTest.class.getResource("/eu/dnetlib/pace/examples/publications.to.fix.json").toURI()) + .get(DedupLocalTest.class.getResource("/eu/dnetlib/pace/examples/openorgs.to.fix.json").toURI()) .toFile() .getAbsolutePath(); DedupConfig dedupConf = DedupConfig.load(readFileFromHDFS(Paths - .get(DedupLocalTest.class.getResource("/eu/dnetlib/pace/config/pub.new.tree.conf.json").toURI()) + .get(DedupLocalTest.class.getResource("/eu/dnetlib/pace/config/organization.current.conf.json").toURI()) .toFile() .getAbsolutePath())); diff --git a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/organization.current.conf.json b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/organization.current.conf.json index 8e3b90e..edb8cba 100644 --- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/organization.current.conf.json +++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/organization.current.conf.json @@ -3,8 +3,9 @@ "threshold" : "0.99", "dedupRun" : "001", "entityType" : "organization", + "subEntityValue": "organization", "orderField" : "legalname", - "queueMaxSize" : "2000", + "queueMaxSize" : "100000", "groupMaxSize" : "50", "slidingWindowSize" : "200", "idPath":"$.id", @@ -143,10 +144,10 @@ } }, "model" : [ - { "name" : "country", "type" : "String", "path" : "$.organization.metadata.country.classid"}, - { "name" : "legalshortname", "type" : "String", "path" : "$.organization.metadata.legalshortname.value"}, - { "name" : "legalname", "type" : "String", "path" : "$.organization.metadata.legalname.value" }, - { "name" : "websiteurl", "type" : "URL", "path" : "$.organization.metadata.websiteurl.value" }, + { "name" : "country", "type" : "String", "path" : "$.country.classid"}, + { "name" : "legalshortname", "type" : "String", "path" : "$.legalshortname.value"}, + { "name" : "legalname", "type" : "String", "path" : "$.legalname.value" }, + { "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl.value" }, { "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='grid')].value"}, { "name" : "originalId", "type" : "String", "path" : "$.id" } ], @@ -154,7 +155,7 @@ "legalname" : [] }, "synonyms": { - "key::1": ["university","università", "universitas", "università studi","universitario","universitaria","université", "universite", "universitaire","universitaires","universidad","universitade","Universität","universitaet","Uniwersytet","университет","universiteit","πανεπιστήμιο","universitesi","universiteti", "universiti"], + "key::1": ["university","università", "universitas", "università studi","universitario","universitaria","université", "universite", "universitaire","universitaires","universidad","universitade","Universität","universitaet","Uniwersytet","университет","universiteit","πανεπιστήμιο","universitesi","universiteti", "universiti", "Πανεπιστήμιο", "panepistemio"], "key::2": ["studies","studi","études","estudios","estudos","Studien","studia","исследования","studies","σπουδές"], "key::3": ["advanced","superiore","supérieur","supérieure","supérieurs","supérieures","avancado","avancados","fortgeschrittene","fortgeschritten","zaawansowany","передовой","gevorderd","gevorderde","προχωρημένος","προχωρημένη","προχωρημένο","προχωρημένες","προχωρημένα","wyzsza"], "key::4": ["institute","istituto","institut","instituto","instituto","Institut","instytut","институт","instituut","ινστιτούτο"], @@ -163,7 +164,7 @@ "key::7": ["college","collegio","colegio","faculdade","Hochschule","Szkoła Wyższa","Высшая школа","κολλέγιο"], "key::8": ["foundation","fondazione","fondation","fundación","fundação","Stiftung","Fundacja","фонд","stichting","ίδρυμα","idryma"], "key::9": ["center","centro","centre","centro","centro","zentrum","centrum","центр","centrum","κέντρο"], - "key::10": ["national","nazionale","national","nationale","nationaux","nationales","nacional","nacional","national","krajowy","национальный","nationaal","nationale","εθνικό"], + "key::10": ["national","nazionale","national","nationale","nationaux","nationales","nacional","nacional","national","krajowy","национальный","nationaal","nationale","εθνικό", "eθνικό"], "key::11": ["association","associazione","association","asociación","associação","Verein","verband","stowarzyszenie","ассоциация","associatie"], "key::12": ["society","societa","société","sociedad","sociedade","gesellschaft","społeczeństwo","общество","maatschappij","κοινωνία"], "key::13": ["international","internazionale","international","internacional","internacional","international","międzynarodowy","Международный","internationaal","internationale","διεθνής","διεθνή","διεθνές"], diff --git a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/openorgs.to.fix.json b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/openorgs.to.fix.json index 574335d..51299ba 100644 --- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/openorgs.to.fix.json +++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/openorgs.to.fix.json @@ -1,6 +1,5 @@ -{"eclegalbody": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecresearchorganization": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "legalname": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "Hospital de Olhos Sadalla Amin Ghanem"}, "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "qualifier": {"classid": "GRID", "classname": "GRID", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.459901.0"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "qualifier": {"classid": "ROR", "classname": "ROR", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "https://ror.org/015w8tk05"}], "websiteurl": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "http://www.sadalla.com.br/en/"}, "oaiprovenance": null, "logourl": null, "collectedfrom": [{"dataInfo": null, "value": "OpenOrgs Database", "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8"}], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "alternativeNames": [], "echighereducation": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "id": "20|openorgsmesh::285d56eaf89ddacbd37b3d8b5ce73110", "eclegalperson": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "lastupdatetimestamp": 1595945048456, "ecinternationalorganizationeurinterests": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "dateofcollection": "2020-07-16", "dateoftransformation": "2020-07-16", "ecnonprofit": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecenterprise": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecinternationalorganization": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecnutscode": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "legalshortname": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "Hospital de Olhos Sadalla Amin Ghanem"}, "country": {"classid": "BR", "classname": "Brazil", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "extraInfo": [], "originalId": ["openorgsmesh::0000049855-972d021b9956334e3c8ec88ebe3731e8"], "ecsmevalidated": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}} -{"eclegalbody": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecresearchorganization": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "legalname": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "Ghaem Hospital"}, "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "qualifier": {"classid": "GRID", "classname": "GRID", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.415529.e"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "qualifier": {"classid": "ROR", "classname": "ROR", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "https://ror.org/05n9fs062"}], "websiteurl": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "http://www.ghaem-hospital.ir/e-n/Home.aspx"}, "oaiprovenance": null, "logourl": null, "collectedfrom": [{"dataInfo": null, "value": "OpenOrgs Database", "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8"}], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "alternativeNames": [], "echighereducation": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "id": "20|openorgsmesh::74130fd7241f2acf33de68e5c38320c0", "eclegalperson": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "lastupdatetimestamp": 1595945048456, "ecinternationalorganizationeurinterests": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "dateofcollection": "2020-07-16", "dateoftransformation": "2020-07-16", "ecnonprofit": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecenterprise": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecinternationalorganization": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecnutscode": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "legalshortname": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "Ghaem Hospital"}, "country": {"classid": "IR", "classname": "Iran (Islamic Republic of)", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "extraInfo": [], "originalId": ["openorgsmesh::0000007665-aa660bce41085d633acab2ac7564c9a3"], "ecsmevalidated": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}} -{"eclegalbody": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "ecresearchorganization": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "legalname": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "Sadalla Amin Ghanem Eye Hospital"}, "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "qualifier": {"classid": "GRID", "classname": "GRID", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.459901.0"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "qualifier": {"classid": "ROR", "classname": "ROR", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "https://ror.org/015w8tk05"}], "websiteurl": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "http://www.sadalla.com.br/en/"}, "oaiprovenance": null, "logourl": null, "collectedfrom": [{"dataInfo": null, "value": "OpenOrgs Database", "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8"}], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "alternativeNames": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "Hospital de Olhos Sadalla Amin Ghanem"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "Sadalla Amin Ghanem Eye Hospital"}], "echighereducation": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "id": "20|openorgs____::53f1430ea6da6379fa65680f46e88578", "eclegalperson": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "lastupdatetimestamp": 1595945048456, "ecinternationalorganizationeurinterests": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "dateofcollection": "2020-07-16", "dateoftransformation": "2020-07-16", "ecnonprofit": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "ecenterprise": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "ecinternationalorganization": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "ecnutscode": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "legalshortname": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "Sadalla Amin Ghanem Eye Hospital"}, "country": {"classid": "BR", "classname": "Brazil", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "extraInfo": [], "originalId": ["openorgs____::0000049855"], "ecsmevalidated": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}} -{"eclegalbody": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecresearchorganization": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "legalname": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "Sadalla Amin Ghanem Eye Hospital"}, "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "qualifier": {"classid": "GRID", "classname": "GRID", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.459901.0"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "qualifier": {"classid": "ROR", "classname": "ROR", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "https://ror.org/015w8tk05"}], "websiteurl": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "http://www.sadalla.com.br/en/"}, "oaiprovenance": null, "logourl": null, "collectedfrom": [{"dataInfo": null, "value": "OpenOrgs Database", "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8"}], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "alternativeNames": [], "echighereducation": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "id": "20|openorgsmesh::6c8abe289b5c269a554ac63360e7f1da", "eclegalperson": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "lastupdatetimestamp": 1595945048456, "ecinternationalorganizationeurinterests": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "dateofcollection": "2020-07-16", "dateoftransformation": "2020-07-16", "ecnonprofit": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecenterprise": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecinternationalorganization": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecnutscode": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "legalshortname": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "Sadalla Amin Ghanem Eye Hospital"}, "country": {"classid": "BR", "classname": "Brazil", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "extraInfo": [], "originalId": ["openorgsmesh::0000049855-9a405d988b6b2129afaa097be4e10c23"], "ecsmevalidated": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}} -{"eclegalbody": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecresearchorganization": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "legalname": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "\u0628\u06cc\u0645\u0627\u0631\u0633\u062a\u0627\u0646 \u0642\u0627\u0626\u0645"}, "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "qualifier": {"classid": "GRID", "classname": "GRID", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.415529.e"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "qualifier": {"classid": "ROR", "classname": "ROR", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "https://ror.org/05n9fs062"}], "websiteurl": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "http://www.ghaem-hospital.ir/e-n/Home.aspx"}, "oaiprovenance": null, "logourl": null, "collectedfrom": [{"dataInfo": null, "value": "OpenOrgs Database", "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8"}], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "alternativeNames": [], "echighereducation": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "id": "20|openorgsmesh::0d6694cedbcb9ef04f29777911e66527", "eclegalperson": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "lastupdatetimestamp": 1595945048456, "ecinternationalorganizationeurinterests": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "dateofcollection": "2020-07-16", "dateoftransformation": "2020-07-16", "ecnonprofit": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecenterprise": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecinternationalorganization": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "ecnutscode": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}, "legalshortname": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "\u0628\u06cc\u0645\u0627\u0631\u0633\u062a\u0627\u0646 \u0642\u0627\u0626\u0645"}, "country": {"classid": "IR", "classname": "Iran (Islamic Republic of)", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "extraInfo": [], "originalId": ["openorgsmesh::0000007665-132774ec4dd8b15a8ab11036918b1e21"], "ecsmevalidated": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.5"}, "value": "false"}} -{"eclegalbody": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "ecresearchorganization": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "legalname": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "Ghaem Hospital"}, "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "qualifier": {"classid": "GRID", "classname": "GRID", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.415529.e"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "qualifier": {"classid": "ROR", "classname": "ROR", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "https://ror.org/05n9fs062"}], "websiteurl": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "http://www.ghaem-hospital.ir/e-n/Home.aspx"}, "oaiprovenance": null, "logourl": null, "collectedfrom": [{"dataInfo": null, "value": "OpenOrgs Database", "key": "10|openaire____::0362fcdb3076765d9c0041ad331553e8"}], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "alternativeNames": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "Ghaem Hospital"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "\u0628\u06cc\u0645\u0627\u0631\u0633\u062a\u0627\u0646 \u0642\u0627\u0626\u0645"}], "echighereducation": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "id": "20|openorgs____::a006d049e5f37a53cab32dbf89137290", "eclegalperson": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "lastupdatetimestamp": 1595945048456, "ecinternationalorganizationeurinterests": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "dateofcollection": "2020-07-16", "dateoftransformation": "2020-07-16", "ecnonprofit": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "ecenterprise": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "ecinternationalorganization": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "ecnutscode": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}, "legalshortname": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "Ghaem Hospital"}, "country": {"classid": "IR", "classname": "Iran (Islamic Republic of)", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "extraInfo": [], "originalId": ["openorgs____::0000007665"], "ecsmevalidated": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.99"}, "value": "false"}} \ No newline at end of file +{"collectedfrom":[{"key":"10|openaire____::5ad21762dcdc66b809a46010390f380e","value":"General Secretariat for Research and Innovation","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1633675959563,"id":"20|gsri________::2421e624989857655691b0247e59e2fd","originalId":["gsri________::773229da9e915fea7ffcd5ccd4611c42"],"pid":[],"dateofcollection":"2021-02-05","dateoftransformation":"2021-02-05","extraInfo":[],"oaiprovenance":null,"legalshortname":null,"legalname":{"value":"Εθνικό & Καποδιστριακό Πανεπιστήµιο Αθηνών (ΕΚΠΑ)","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"alternativeNames":[],"websiteurl":null,"logourl":null,"eclegalbody":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"eclegalperson":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecnonprofit":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecresearchorganization":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"echighereducation":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecinternationalorganizationeurinterests":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecinternationalorganization":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecenterprise":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsmevalidated":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecnutscode":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"country":{"classid":"GR","classname":"Greece","schemeid":"dnet:countries","schemename":"dnet:countries"}} +{"collectedfrom":[{"key":"10|openaire____::5ad21762dcdc66b809a46010390f380e","value":"General Secretariat for Research and Innovation","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1633675959563,"id":"20|gsri________::b3ee764cefa106bcf833e997768a074a","originalId":["gsri________::abd317fef4a1cf3c64261bc17f577a41"],"pid":[],"dateofcollection":"2021-02-05","dateoftransformation":"2021-02-05","extraInfo":[],"oaiprovenance":null,"legalshortname":null,"legalname":{"value":"ΕΚΠΑ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"alternativeNames":[],"websiteurl":null,"logourl":null,"eclegalbody":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"eclegalperson":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecnonprofit":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecresearchorganization":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"echighereducation":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecinternationalorganizationeurinterests":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecinternationalorganization":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecenterprise":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsmevalidated":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecnutscode":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"country":{"classid":"GR","classname":"Greece","schemeid":"dnet:countries","schemename":"dnet:countries"}} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1633675959563,"id":"20|opendoar____::a135ccb3169c14f3764f9f95a654d0cb","originalId":["opendoar____::National_and_Kapodistrian_University_of_Athens_GR"],"pid":[],"dateofcollection":"2020-10-23","dateoftransformation":"2020-10-23","extraInfo":[],"oaiprovenance":null,"legalshortname":null,"legalname":{"value":"National and Kapodistrian University of Athens","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"alternativeNames":[],"websiteurl":{"value":"https://www.uoa.gr","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"logourl":null,"eclegalbody":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"eclegalperson":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecnonprofit":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecresearchorganization":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"echighereducation":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecinternationalorganizationeurinterests":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecinternationalorganization":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecenterprise":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsmevalidated":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecnutscode":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"country":{"classid":"GR","classname":"Greece","schemeid":"dnet:countries","schemename":"dnet:countries"}} +{"collectedfrom":[{"key":"10|openaire____::13337b1bfeb104bacd1ee867916a488a","value":"OrgReg","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1633675959563,"id":"20|orgreg______::50c36c5e3a74ce2eefbb2bd0038a411f","originalId":["orgreg______::GR0020"],"pid":[],"dateofcollection":"2021-03-09","dateoftransformation":"2021-03-09","extraInfo":[],"oaiprovenance":null,"legalshortname":{"value":"UOA (ΕΚΠΑ)","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"legalname":{"value":"Εθνικό και Καποδιστριακό Πανεπιστήμιο Αθηνών","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"alternativeNames":[],"websiteurl":{"value":"http://www.uoa.gr/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"logourl":null,"eclegalbody":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"eclegalperson":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecnonprofit":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecresearchorganization":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"echighereducation":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecinternationalorganizationeurinterests":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecinternationalorganization":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecenterprise":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsmevalidated":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecnutscode":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"country":{"classid":"GR","classname":"Greece","schemeid":"dnet:countries","schemename":"dnet:countries"}} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1633675959563,"id":"20|re3data_____::3ed64836d8ccc534c4940a3967c2368a","originalId":["re3data_____::37f1fba4edd453c9386be4eee4403841"],"pid":[{"value":"ROR:04gnjpq42","qualifier":{"classid":"ROR","classname":"ROR","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-10-23","dateoftransformation":"2020-10-23","extraInfo":[],"oaiprovenance":null,"legalshortname":null,"legalname":{"value":"National and Kapodistrian University of Athens","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"alternativeNames":[],"websiteurl":{"value":"http://en.uoa.gr/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"logourl":null,"eclegalbody":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"eclegalperson":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecnonprofit":{"value":"true","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecresearchorganization":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"echighereducation":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecinternationalorganizationeurinterests":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecinternationalorganization":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecenterprise":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsmevalidated":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecnutscode":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"country":{"classid":"GR","classname":"Greece","schemeid":"dnet:countries","schemename":"dnet:countries"}} \ No newline at end of file diff --git a/dnet-pace-core/pom.xml b/dnet-pace-core/pom.xml index 21116ce..e4711b7 100644 --- a/dnet-pace-core/pom.xml +++ b/dnet-pace-core/pom.xml @@ -67,6 +67,11 @@ json-path + + com.ibm.icu + icu4j + + diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java index b95d1c7..e67275c 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java @@ -41,7 +41,7 @@ public class KeywordsClustering extends AbstractClusteringFunction { public Collection apply(final Config conf, List fields) { return fields.stream().filter(f -> !f.isEmpty()) .map(Field::stringValue) - .map(this::cleanup) //TODO can I add this to the AbstractClusteringFunction without overriding the method here? + .map(this::cleanup) .map(this::normalize) .map(s -> filterAllStopWords(s)) .map(s -> doApply(conf, s)) diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java index 858fe98..0802dfb 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java @@ -24,6 +24,7 @@ import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; +import com.ibm.icu.text.Transliterator; /** * Set of common functions for the framework @@ -36,6 +37,7 @@ public abstract class AbstractPaceFunctions { private static Map cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv"); //list of stopwords in different languages + protected static Set stopwords_gr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_gr.txt"); protected static Set stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt"); protected static Set stopwords_de = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt"); protected static Set stopwords_es = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt"); @@ -43,6 +45,9 @@ public abstract class AbstractPaceFunctions { protected static Set stopwords_it = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt"); protected static Set stopwords_pt = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt"); + //transliterator + protected static Transliterator transliterator = Transliterator.getInstance("Any-Eng"); + //blacklist of ngrams: to avoid generic keys protected static Set ngramBlacklist = loadFromClasspath("/eu/dnetlib/pace/config/ngram_blacklist.txt"); @@ -68,15 +73,13 @@ public abstract class AbstractPaceFunctions { protected String cleanup(final String s) { - final String s00 = s.replaceAll(HTML_REGEX, ""); - final String s0 = unicodeNormalization(s00.toLowerCase()); - final String s1 = fixAliases(s0); - final String s2 = nfd(s1); - final String s3 = s2.replaceAll("–", " "); - final String s4 = s3.replaceAll("&", " "); - final String s5 = s4.replaceAll(""", " "); - final String s6 = s5.replaceAll("−", " "); - final String s7 = s6.replaceAll("([0-9]+)", " $1 "); + final String s1 = s.replaceAll(HTML_REGEX, ""); + final String s2 = unicodeNormalization(s1.toLowerCase()); + final String s3 = nfd(s2); + final String s4 = fixXML(s3); + final String s5 = s4.replaceAll("([0-9]+)", " $1 "); + final String s6 = transliterate(s5); + final String s7 = fixAliases(s6); final String s8 = s7.replaceAll("[^\\p{ASCII}]", ""); final String s9 = s8.replaceAll("[\\p{Punct}]", " "); final String s10 = s9.replaceAll("\\n", " "); @@ -85,6 +88,14 @@ public abstract class AbstractPaceFunctions { return s12; } + protected String fixXML(final String a){ + + return a.replaceAll("–", " ") + .replaceAll("&", " ") + .replaceAll(""", " ") + .replaceAll("−", " "); + } + protected boolean checkNumbers(final String a, final String b) { final String numbersA = getNumbers(a); final String numbersB = getNumbers(b); @@ -129,6 +140,15 @@ public abstract class AbstractPaceFunctions { return sb.toString(); } + protected static String transliterate(final String s) { + try { + return transliterator.transliterate(s); + } + catch(Exception e) { + return s; + } + } + protected String removeSymbols(final String s) { final StringBuilder sb = new StringBuilder(); @@ -147,7 +167,7 @@ public abstract class AbstractPaceFunctions { } protected String normalize(final String s) { - return nfd(unicodeNormalization(s)) + return fixAliases(transliterate(nfd(unicodeNormalization(s)))) .toLowerCase() // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings .replaceAll("[^ \\w]+", "") @@ -200,6 +220,7 @@ public abstract class AbstractPaceFunctions { s = filterStopWords(s, stopwords_fr); s = filterStopWords(s, stopwords_pt); s = filterStopWords(s, stopwords_es); + s = filterStopWords(s, stopwords_gr); return s; } @@ -215,10 +236,13 @@ public abstract class AbstractPaceFunctions { } public static Set loadFromClasspath(final String classpath) { + + Transliterator transliterator = Transliterator.getInstance("Any-Eng"); + final Set h = Sets.newHashSet(); try { for (final String s : IOUtils.readLines(NGramUtils.class.getResourceAsStream(classpath))) { - h.add(s); + h.add(fixAliases(transliterator.transliterate(s))); //transliteration of the stopwords } } catch (final Throwable e) { return Sets.newHashSet(); @@ -227,6 +251,9 @@ public abstract class AbstractPaceFunctions { } public static Map loadMapFromClasspath(final String classpath) { + + Transliterator transliterator = Transliterator.getInstance("Any-Eng"); + final Map m = new HashMap<>(); try { for (final String s : IOUtils.readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath))) { @@ -234,7 +261,7 @@ public abstract class AbstractPaceFunctions { String[] line = s.split(";"); String value = line[0]; for (int i = 1; i < line.length; i++) { - m.put(line[i].toLowerCase(), value); + m.put(fixAliases(transliterator.transliterate(line[i].toLowerCase())), value); } } } catch (final Throwable e) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java index 3397110..5ae2edf 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java @@ -3,6 +3,7 @@ package eu.dnetlib.pace.config; import com.fasterxml.jackson.annotation.JsonIgnore; import com.google.common.collect.Maps; +import com.ibm.icu.text.Transliterator; import eu.dnetlib.pace.common.AbstractPaceFunctions; import eu.dnetlib.pace.model.ClusteringDef; import eu.dnetlib.pace.model.FieldDef; @@ -43,10 +44,12 @@ public class PaceConfig extends AbstractPaceFunctions implements Serializable { public void initTranslationMap(){ translationMap = Maps.newHashMap(); + + Transliterator transliterator = Transliterator.getInstance("Any-Eng"); for (String key : synonyms.keySet()) { for (String term : synonyms.get(key)){ translationMap.put( - normalize(term.toLowerCase()), + fixAliases(transliterator.transliterate(term.toLowerCase())), key); } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java index 5f46150..d8e416d 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java @@ -50,6 +50,9 @@ public class JaroWinklerNormalizedName extends AbstractComparator { cb = removeKeywords(cb, keywords2); cb = removeKeywords(cb, cities2); + ca = ca.replaceAll("[ ]{2,}", " "); + cb = cb.replaceAll("[ ]{2,}", " "); + if (ca.isEmpty() && cb.isEmpty()) return 1.0; else diff --git a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/city_map.csv b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/city_map.csv index 066afa7..360221a 100644 --- a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/city_map.csv +++ b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/city_map.csv @@ -909,7 +909,6 @@ city::2389086;Berberati;BBT;Berberati;Berbérati;Берберати; city::2389853;Bangui;BGF;Bangi;Bangis;Bangui;Mpan'nkoui;ban ji;bang-gi;bangi;bangwyy;Μπανγκουί;Банги;Бангі;בנגואי;بانگوئی;ባንጊ;バンギ;班基;방기; city::2255414;Pointe-Noire;PNR;Pointe-Noire;Puehnt-Nuar;Puent Nuaras;puaengteunualeu;Пуэнт-Нуар;푸앵트누아르; city::2258261;Dolisie;DIS;Dolisi;Dolisie;Dolisje;Dolizi;Dolosie;Loubomo;Lubomo;dolliji;dorishi;duo li xi;dwlysy;Долиси;Лубомо;دولیسی;ドリシー;多利西;돌리지; -city::2259383;Kayes;Jacob;Kai;Kajes;Kaye;Kayes;Kaï;Кайес; city::2260535;Brazzaville;BZV;Braza;Brazavil;Brazavilis;Brazavilo;Brazzavil';Brazzaville;Maya-Maya;Mprazabil;N'Tamo;beulajabil;brazafyl;brazawyl;brzwwyl;bu la chai wei er;burazavu~iru;Μπραζαβίλ;Браззавиль;ברזוויל;برازافيل;برازاویل;ብራዛቪል;ブラザヴィル;布拉柴维尔;브라자빌; city::2657896;Zurich;Cirihe;Cirikh;Ciurichas;Cjurikh;Cjurikh khot;Cuerih;Curych;Cürih;Cīrihe;Gorad Cjurykh;Lungsod ng Zuerich;Lungsod ng Zürich;Su-la-sie;Suerix;Syurix;Sürix;Sŭ-là̤-sié;Tsuerix;Tsurique;Tsürix;Turicum;Turitg;ZRH;Zeurich;Zirich;Zirik;Zuerich;Zuerigh;Zuerih;Zuric;Zurich;Zuricu;Zurigh;Zurigo;Zuriko;Zurique;Zurych;Zurìcu;Zyriche;Zyrihu;Zúric;Zúrich;Zürich;Zürigh;Zürih;churihhi;chwilihi;curikku;jhyurika;jurikha;su li shi;su rik;suricc;tsiurikhi;tsyryk;zi'urikha;zwrykh;zyryk;zyurikha;zywrch;zywrh;zywrkh;Ζυρίχη;Горад Цюрых;Цирих;Цюрих;Цюрих хот;Ցյուրիխ;ציריך;زوريخ;زوریخ;زيورخ;زیورخ;زیورچ;سيۇرىخ;څوریخ;ܙܝܘܪܚ;ܬܣܝܪܝܟ;ज़्यूरिख़;झ्युरिक;জুরিখ;ਜ਼ਿਊਰਿਖ;சூரிக்கு;സൂറിച്ച്;ซูริก;ဇူးရစ်ချ်မြို့;ციურიხი;ዙሪክ;チューリッヒ;苏黎世;蘇黎世;취리히; city::2657970;Winterthur;Eulachstadt;Gorad Vintehrtur;Vintertour;Vintertur;Vintertura;Vinterturas;Vinterturi;Vinterturo;Vintertūra;Vintertūras;Vitudurum;Winterthour;Winterthur;ZLI;binteotueo;fyntrtwr;vu~intato~uru;wen te tu er;wntrtwr;Βίντερτουρ;Винтертур;Вінтертур;Горад Вінтэртур;فينترتور;ونترتور;ونٹرتھر;ვინტერთური;ヴィンタートゥール;温特图尔;빈터투어; @@ -2994,7 +2993,7 @@ city::262036;Glyfada;Aixone;Glifadha;Glifádha;Glyfada;Glyfáda;Γλυφάδα; city::262135;Galatsi;Galatsi;Galatsion;Galátsi;Galátsion;Γαλάτσι;Γαλάτσιον; city::263986;Agios Dimitrios;Agios Dimitrios;Ayios Dhimitrios;Brakhami;Brakhámi;Áyios Dhimítrios;Άγιος Δημήτριος; city::264194;Agia Paraskevi;Agia Paraskeue;Agia Paraskevi;Agía Paraskeví;Ayia Paraskevi;Ayía Paraskeví;Αγία Παρασκευή; -city::264371;Athens;ATH;Afina;Afini;Afiny;An Aithin;Ateena;Atehny;Aten;Atena;Atenai;Atenas;Atenas - Athena;Atenas - Αθήνα;Atene;Atenes;Ateni;Ateno;Atenoj;Ateny;Athen;Athena;Athenae;Athenai;Athene;Athenes;Athens;Atheny;Athina;Athinai;Athinia;Athènes;Athén;Athénes;Athény;Athína;Athínai;Atina;Atény;Atēnas;Atėnai;Aþena;Kota Athena;Lungsod ng Athina;Lungsod ng Athína;atene;atene si;ateni;athensa;athyna;atn;etens;xethens;ya dian;Αθήνα;Αθήναι;Αθηνα;Αθηναι;Атина;Атэны;Афины;Афіни;Аѳины;Աթենք;אתונה;آتن;أثينا;ئافېنا;ܐܬܝܢܐ;अथेन्स;ஏதென்ஸ்;เอเธนส์;ათენი;Ἀθῆναι;アテネ;雅典;아테네;아테네 시; +city::264371;Athens;athenon;ATH;Afina;Afini;Afiny;An Aithin;Ateena;Atehny;Aten;Atena;Atenai;Atenas;Atenas - Athena;Atenas - Αθήνα;Αθηνών;Atene;Atenes;Ateni;Ateno;Atenoj;Ateny;Athen;Athena;Athenae;Athenai;Athene;Athenes;Athens;Atheny;Athina;Athinai;Athinia;Athènes;Athén;Athénes;Athény;Athína;Athínai;Atina;Atény;Atēnas;Atėnai;Aþena;Kota Athena;Lungsod ng Athina;Lungsod ng Athína;atene;atene si;ateni;athensa;athyna;atn;etens;xethens;ya dian;Αθήνα;Αθήναι;Αθηνα;Αθηναι;Атина;Атэны;Афины;Афіни;Аѳины;Աթենք;אתונה;آتن;أثينا;ئافېنا;ܐܬܝܢܐ;अथेन्स;ஏதென்ஸ்;เอเธนส์;ათენი;Ἀθῆναι;アテネ;雅典;아테네;아테네 시; city::265243;Marousi;Amarousio;Amarousion;Amaroúsion;Marousi;Maroussi;Maroúsi;Αμαρούσιον;Μαρούσι; city::265488;Acharnes;Acharnae;Acharnai;Acharne;Acharnes;Akharnai;Akharnaí;Menidhi;Menidhion;Menidi;Menidion;Menioi;Menídhi;Menídhion;Meníoi;Αχαρνές;Αχαρναί;Μενίδι;Μενίδιον; city::265533;Aigaleo;Aegaleo;Aigaleo;Aigáleo;Egaleo;Αιγάλεω; @@ -5210,7 +5209,6 @@ city::2451478;Segou;Segi;Segou;Segu;Segú;Senkou;Ségou;sai gu;segu;sgw;syghw;Σ city::2453348;Mopti;MZI;Mopti;Moptis;mo pu ti;mobti;moputi;mwbty;mwpty;Μοπτί;Мопти;Мопті;موبتي;موپتی;موپٹی;モプティ;莫普提;몹티; city::2453662;Markala;Markala; city::2454268;Koutiala;KTX;Koutiala;Kutiala;ku jia la;Кутиала;庫佳拉; -city::2455518;Kayes;Gorad Kaes;KYS;Kaes;Kagies;Kajes;Kajesas;Kayes;Kayi;ka yi;kai;kays;keseu;kyz;Καγιές;Горад Каес;Каес;Кайес;Каєс;كايس;کایس;کیز;კაესი;カイ;卡伊;케스; city::2457163;Gao;GAQ;Gao;Nkao;gao;gaw;gayw;jaw;jia ao;ka xo;Γκάο;Гао;Ґао;גאו;جاو;گائو;گاو;กาโอ;ガオ;加奥;가오; city::2460596;Bamako;BKO;Bamaco - Bamako;Bamakas;Bamako;Bamaku;Bamakó;Bamakɔ;Bammaco;Bammako;Mpamako;ba ma ke;bamako;bamakw;bmqw;Μπαμάκο;Бамако;Բամակո;במקו;باماكو;باماکو;ባማኮ;バマコ;巴馬科;바마코; city::1285173;Yenangyaung;Yaynangyoung;Yenangyaung;Yenangyoung; @@ -7474,7 +7472,6 @@ city::4177887;West Palm Beach;Litus Palmense Occidentale;Okcidenta Palm Beach;PB city::4178003;Weston;Uehston;Veston;Weston;vestana;wei si dun;wstwn;wstwn flwryda;Вестон;Уэстон;وستون;وستون، فلوریدا;वेस्टन;韦斯顿; city::4179320;Albany;ABY;Albany;City of Opportunity;Olbani;albani;albany;albany jarjya;albany jwrjya;ao er ba ni;olbeoni;orubani;Олбани;Олбані;آلبانی، جورجیا;ألباني;البانی، جارجیا;अल्बानी;オールバニ;奧爾巴尼;올버니; city::4179574;Alpharetta;Al'faretta;Alfareta;Alpharetta;New Prospect Campground;alfarta jwrjya;alfaryta;alfaryta jarjya;alphareta;Алфарета;Альфаретта;آلفارتا، جورجیا;ألفاريتا;الفاریتا، جارجیا;अल्फारेटा; -city::4180386;Athens;AHN;Atens;Atensas;Athens;Athens i Georgia;Athens-Clarke County;Atina;Atuns;Cedar Shoals;aeseonseu;asenzu;athensa;athyna;atn jwrjya;atynz jwrjya;ethensaklarka ka'unti;ya dian;Атенс;Атина;Атънс;אתנס;آتئنز، جورجیا;آتن، جورجیا;أثينا;ایتھنز، جارجیا;अथेन्स;एथेन्सक्लार्क काउन्टी;アセンズ;雅典;애선스; city::4180439;Atlanta;ATL;Atlant;Atlanta;Atlantae;Atlonta;Canebrake;Gorad Atlanta;Marthasdale;Marthasville;Standing Peachtree;Terminus;White Hall;Whitehall;aeteullaenta;arr‌lanra nagaram;atalanta;ateullaenta;ateullanta;atlanta;atoranta;atʼlantʼa;etalanta;etlanta;ya te lan da;Ατλάντα;Атлантæ;Атланта;Горад Атланта;Ատլանտա;אטלאנטא;אטלנטה;آتلانتا;أتلانتا;ئەتڵانتا;اٹلانٹا;اٹلانٹا، جارجیا;अटलांटा;अटलान्टा;एट्लान्टा;एत्लान्ता;আটলান্টা;એટલાન્ટા;அட்லான்டா;అట్లాంటా;ಅಟ್ಲಾಂಟಾ;അറ്റ്‌ലാന്റാ നഗരം;แอตแลนตา;ཨ་ཊི་ལཱན་ཊཱ།;အတ္တလန္တာမြို့;ატლანტა;አትላንታ;アトランタ;亚特兰大;亞特蘭大;아틀란타;아틀랜타;애틀랜타; city::4184530;Brookhaven;Brookhaven;Brookhaven Heights;Nort Atlanta;North Atlanta;brwk hawn jwrjya;brwkhafn;nartha etlanta;Норт Атланта;بروكهافن;بروک هاون، جورجیا;بروک ہیون، جارجیا;नर्थ एट्लान्टा; city::4188985;Columbus;CSG;Columbus;Kolambus;Kolumbas;Kolumbus;Kulumbus;ge lun bu;klmbws jwrjya;kolambasa;kolleombeoseu;kolumbus;koronbasu;kwlmbs jarjya;kwlwmbws;qwlwmbws;Коламбус;Колумбус;Кълъмбъс;קולומבוס;كولومبوس;کلمبوس، جورجیا;کولمبس، جارجیا;कोलम्बस;コロンバス;哥伦布;콜럼버스; @@ -7953,7 +7950,7 @@ city::5258957;La Crosse;Gateway City;LSE;La Crosse;La Kros;La-Kross;Lac Rosse;La city::5261457;Madison;Gorad Madysan;MSN;Madison;Madisonas;Madisonia;Madisons;Madisun;Mantison;Medison;Medisona;Mehdison;madisan;madison;madisoni;madyswn;maediseun;mai di xun;maidisana;mdysn wyskansyn;mdyswn;medisana;metican;Μάντισον;Горад Мадысан;Мадисон;Мадисън;Медисон;Медісон;Мэдисон;Մեդիսոն;מדיסון;ماديسون;مدیسن، ویسکانسین;میڈیسن;میڈیسون، وسکونسن;माडिसन्;मॅडिसन;मेडिसन;मैडिसन;மேடிசன்;მადისონი;マディソン;麦迪逊;매디슨; city::5263045;Milwaukee;Gorad Miluoki;Juneautown;Kilbourntown;MKE;Mahn-a-wau-kee Seepe;Mahn-a-wauk-ee See-pe;Mahn-a-waukee Seepe;Mahn-a-waukie;Mahn-ah-wauk Seepe;Mahnawauk;Man-a-wau-kee;Man-a-wauk-ee;Man-na-wah-kie;Mana'wa;Manawaki;Manawaukee;Manayaukee;Maunahwauke;Mee-lee-waug-ee;Meliki;Melleoki;Melwarik;Meneawkee;Meolaki;Mil-wah-kie;Milgouoki;Milioke;Millewacki;Millicki;Milo-aki;Milouagui;Milouakik;Milowages;Miluoki;Miluokʻi;Milvauchia;Milvoki;Milvokio;Milvokis;Milwacky;Milwahkie;Milwalka;Milwalky;Milwarck;Milwarik;Milwaucki;Milwaukee;Milwaukie;Minewaki;Miniaki;Minnawack;Winnipesaukee;mi er wo ji;mil wxki;mil-woki;mila'oyaki;milavoki;miluokʼi;milvaki;milvakki;milvauki;miruu~oki;mlwaky;mylwaky;mylwaky wyskansyn;mylwwqy;Μιλγουόκι;Горад Мілуокі;Милвоки;Милуоки;Мілуокі;Միլուոքի;מילוואקי;מילווקי;ملواکی;ميلواكي;میلواکی، ویسکانسین;मिलवॉकी;मिल्वौकी;মিলওয়াকি;மில்வாக்கி;మిల్వాకీ;ಮಿಲ್ವಾಕೀ;มิลวอกี;მილუოკი;ミルウォーキー;密尔沃基;密爾沃基;밀워키; city::5264870;North La Crosse;; -city::5265838;Oshkosh;Algoma;Athens;Brooklyn;OSH;Oshkosh;Oskosh;Sawdust City;ashkwsh wyskansyn;ausakosa;awshkwsh;awshkwsh wskwnsn;oshukoshu;Ошкош;אושקוש;أوشكوش;اشکوش، ویسکانسین;اوشکوش، وسکونسن;औशकोश;ოშკოში;オシュコシュ; +city::5265838;Oshkosh;Algoma;Brooklyn;OSH;Oshkosh;Oskosh;Sawdust City;ashkwsh wyskansyn;ausakosa;awshkwsh;awshkwsh wskwnsn;oshukoshu;Ошкош;אושקוש;أوشكوش;اشکوش، ویسکانسین;اوشکوش، وسکونسن;औशकोश;ოშკოში;オシュコシュ; city::5268249;Racine;Kipikawi;Port Gilbert;RAC;Racine;Rasijn;Rasin;Rasinas;la xin;leosin;rashin;rasini;rasyn;rasyn wskwnsn;rysyn wyskansyn;Расийн;Расин;Расін;ראסין;راسين;راسین، وسکونسن;ریسین، ویسکانسین;რასინი;ラシーン;拉辛;러신; city::5278052;Waukesha;Prairieville;UES;Uokesho;Uokisha;Vokesha;Vokisha;Waukesha;u~okisho;wakysha wyskansyn;wawkysha wskwnsn;wkysha;Вокеша;Вокиша;Уокешо;Уокиша;واوکیشا، وسکونسن;واکیشا، ویسکانسین;وكيشا;უოკეშო;ウォキショー; city::5278420;West Allis;Vest Alis;alys ghrby wyskansyn;wyst alys;Вест Алис;آلیس غربی، ویسکانسین;ويست أليس;ویسٹ الیس، وسکونسن;უესტ-ალისი; diff --git a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/stopwords_gr.txt b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/stopwords_gr.txt new file mode 100644 index 0000000..25deffb --- /dev/null +++ b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/stopwords_gr.txt @@ -0,0 +1,847 @@ +ένα +έναν +ένας +αι +ακομα +ακομη +ακριβως +αληθεια +αληθινα +αλλα +αλλαχου +αλλες +αλλη +αλλην +αλλης +αλλιως +αλλιωτικα +αλλο +αλλοι +αλλοιως +αλλοιωτικα +αλλον +αλλος +αλλοτε +αλλου +αλλους +αλλων +αμα +αμεσα +αμεσως +αν +ανα +αναμεσα +αναμεταξυ +ανευ +αντι +αντιπερα +αντις +ανω +ανωτερω +αξαφνα +απ +απεναντι +απο +αποψε +από +αρα +αραγε +αργα +αργοτερο +αριστερα +αρκετα +αρχικα +ας +αυριο +αυτα +αυτες +αυτεσ +αυτη +αυτην +αυτης +αυτο +αυτοι +αυτον +αυτος +αυτοσ +αυτου +αυτους +αυτουσ +αυτων +αφοτου +αφου +αἱ +αἳ +αἵ +αὐτόσ +αὐτὸς +αὖ +α∆ιακοπα +βεβαια +βεβαιοτατα +γάρ +γα +γα^ +γε +γι +για +γοῦν +γρηγορα +γυρω +γὰρ +δ' +δέ +δή +δαί +δαίσ +δαὶ +δαὶς +δε +δεν +δι +δι' +διά +δια +διὰ +δὲ +δὴ +δ’ +εαν +εαυτο +εαυτον +εαυτου +εαυτους +εαυτων +εγκαιρα +εγκαιρως +εγω +ειθε +ειμαι +ειμαστε +ειναι +εις +εισαι +εισαστε +ειστε +ειτε +ειχα +ειχαμε +ειχαν +ειχατε +ειχε +ειχες +ει∆εμη +εκ +εκαστα +εκαστες +εκαστη +εκαστην +εκαστης +εκαστο +εκαστοι +εκαστον +εκαστος +εκαστου +εκαστους +εκαστων +εκει +εκεινα +εκεινες +εκεινεσ +εκεινη +εκεινην +εκεινης +εκεινο +εκεινοι +εκεινον +εκεινος +εκεινοσ +εκεινου +εκεινους +εκεινουσ +εκεινων +εκτος +εμας +εμεις +εμενα +εμπρος +εν +ενα +εναν +ενας +ενος +εντελως +εντος +εντωμεταξυ +ενω +ενός +εξ +εξαφνα +εξης +εξισου +εξω +επ +επί +επανω +επειτα +επει∆η +επι +επισης +επομενως +εσας +εσεις +εσενα +εστω +εσυ +ετερα +ετεραι +ετερας +ετερες +ετερη +ετερης +ετερο +ετεροι +ετερον +ετερος +ετερου +ετερους +ετερων +ετουτα +ετουτες +ετουτη +ετουτην +ετουτης +ετουτο +ετουτοι +ετουτον +ετουτος +ετουτου +ετουτους +ετουτων +ετσι +ευγε +ευθυς +ευτυχως +εφεξης +εχει +εχεις +εχετε +εχθες +εχομε +εχουμε +εχουν +εχτες +εχω +εως +εἰ +εἰμί +εἰμὶ +εἰς +εἰσ +εἴ +εἴμι +εἴτε +ε∆ω +η +ημασταν +ημαστε +ημουν +ησασταν +ησαστε +ησουν +ηταν +ητανε +ητοι +ηττον +η∆η +θα +ι +ιι +ιιι +ισαμε +ισια +ισως +ισωσ +ι∆ια +ι∆ιαν +ι∆ιας +ι∆ιες +ι∆ιο +ι∆ιοι +ι∆ιον +ι∆ιος +ι∆ιου +ι∆ιους +ι∆ιων +ι∆ιως +κ +καί +καίτοι +καθ +καθε +καθεμια +καθεμιας +καθενα +καθενας +καθενος +καθετι +καθολου +καθως +και +κακα +κακως +καλα +καλως +καμια +καμιαν +καμιας +καμποσα +καμποσες +καμποση +καμποσην +καμποσης +καμποσο +καμποσοι +καμποσον +καμποσος +καμποσου +καμποσους +καμποσων +κανεις +κανεν +κανενα +κανεναν +κανενας +κανενος +καποια +καποιαν +καποιας +καποιες +καποιο +καποιοι +καποιον +καποιος +καποιου +καποιους +καποιων +καποτε +καπου +καπως +κατ +κατά +κατα +κατι +κατιτι +κατοπιν +κατω +κατὰ +καὶ +κι +κιολας +κλπ +κοντα +κτλ +κυριως +κἀν +κἂν +λιγακι +λιγο +λιγωτερο +λογω +λοιπα +λοιπον +μέν +μέσα +μή +μήτε +μία +μα +μαζι +μακαρι +μακρυα +μαλιστα +μαλλον +μας +με +μεθ +μεθαυριο +μειον +μελει +μελλεται +μεμιας +μεν +μερικα +μερικες +μερικοι +μερικους +μερικων +μεσα +μετ +μετά +μετα +μεταξυ +μετὰ +μεχρι +μη +μην +μηπως +μητε +μη∆ε +μιά +μια +μιαν +μιας +μολις +μολονοτι +μοναχα +μονες +μονη +μονην +μονης +μονο +μονοι +μονομιας +μονος +μονου +μονους +μονων +μου +μπορει +μπορουν +μπραβο +μπρος +μἐν +μὲν +μὴ +μὴν +να +ναι +νωρις +ξανα +ξαφνικα +ο +οι +ολα +ολες +ολη +ολην +ολης +ολο +ολογυρα +ολοι +ολον +ολονεν +ολος +ολοτελα +ολου +ολους +ολων +ολως +ολως∆ιολου +ομως +ομωσ +οποια +οποιαν +οποιαν∆ηποτε +οποιας +οποιας∆ηποτε +οποια∆ηποτε +οποιες +οποιες∆ηποτε +οποιο +οποιοι +οποιον +οποιον∆ηποτε +οποιος +οποιος∆ηποτε +οποιου +οποιους +οποιους∆ηποτε +οποιου∆ηποτε +οποιο∆ηποτε +οποιων +οποιων∆ηποτε +οποι∆ηποτε +οποτε +οποτε∆ηποτε +οπου +οπου∆ηποτε +οπως +οπωσ +ορισμενα +ορισμενες +ορισμενων +ορισμενως +οσα +οσα∆ηποτε +οσες +οσες∆ηποτε +οση +οσην +οσην∆ηποτε +οσης +οσης∆ηποτε +οση∆ηποτε +οσο +οσοι +οσοι∆ηποτε +οσον +οσον∆ηποτε +οσος +οσος∆ηποτε +οσου +οσους +οσους∆ηποτε +οσου∆ηποτε +οσο∆ηποτε +οσων +οσων∆ηποτε +οταν +οτι +οτι∆ηποτε +οτου +ου +ουτε +ου∆ε +οχι +οἱ +οἳ +οἷς +οὐ +οὐδ +οὐδέ +οὐδείσ +οὐδεὶς +οὐδὲ +οὐδὲν +οὐκ +οὐχ +οὐχὶ +οὓς +οὔτε +οὕτω +οὕτως +οὕτωσ +οὖν +οὗ +οὗτος +οὗτοσ +παλι +παντοτε +παντου +παντως +παρ +παρά +παρα +παρὰ +περί +περα +περι +περιπου +περισσοτερο +περσι +περυσι +περὶ +πια +πιθανον +πιο +πισω +πλαι +πλεον +πλην +ποια +ποιαν +ποιας +ποιες +ποιεσ +ποιο +ποιοι +ποιον +ποιος +ποιοσ +ποιου +ποιους +ποιουσ +ποιων +πολυ +ποσες +ποση +ποσην +ποσης +ποσοι +ποσος +ποσους +ποτε +που +πουθε +πουθενα +ποῦ +πρεπει +πριν +προ +προκειμενου +προκειται +προπερσι +προς +προσ +προτου +προχθες +προχτες +πρωτυτερα +πρόσ +πρὸ +πρὸς +πως +πωσ +σαν +σας +σε +σεις +σημερα +σιγα +σου +στα +στη +στην +στης +στις +στο +στον +στου +στους +στων +συγχρονως +συν +συναμα +συνεπως +συνηθως +συχνα +συχνας +συχνες +συχνη +συχνην +συχνης +συχνο +συχνοι +συχνον +συχνος +συχνου +συχνους +συχνων +συχνως +σχε∆ον +σωστα +σόσ +σύ +σύν +σὸς +σὺ +σὺν +τά +τήν +τί +τίς +τίσ +τα +ταυτα +ταυτες +ταυτη +ταυτην +ταυτης +ταυτο,ταυτον +ταυτος +ταυτου +ταυτων +ταχα +ταχατε +ταῖς +τα∆ε +τε +τελικα +τελικως +τες +τετοια +τετοιαν +τετοιας +τετοιες +τετοιο +τετοιοι +τετοιον +τετοιος +τετοιου +τετοιους +τετοιων +τη +την +της +τησ +τι +τινα +τιποτα +τιποτε +τις +τισ +το +τοί +τοι +τοιοῦτος +τοιοῦτοσ +τον +τος +τοσα +τοσες +τοση +τοσην +τοσης +τοσο +τοσοι +τοσον +τοσος +τοσου +τοσους +τοσων +τοτε +του +τουλαχιστο +τουλαχιστον +τους +τουτα +τουτες +τουτη +τουτην +τουτης +τουτο +τουτοι +τουτοις +τουτον +τουτος +τουτου +τουτους +τουτων +τούσ +τοὺς +τοῖς +τοῦ +τυχον +των +τωρα +τό +τόν +τότε +τὰ +τὰς +τὴν +τὸ +τὸν +τῆς +τῆσ +τῇ +τῶν +τῷ +υπ +υπερ +υπο +υποψη +υποψιν +υπό +υστερα +φετος +χαμηλα +χθες +χτες +χωρις +χωριστα +ψηλα +ω +ωραια +ως +ωσ +ωσαν +ωσοτου +ωσπου +ωστε +ωστοσο +ωχ +ἀλλ' +ἀλλά +ἀλλὰ +ἀλλ’ +ἀπ +ἀπό +ἀπὸ +ἀφ +ἂν +ἃ +ἄλλος +ἄλλοσ +ἄν +ἄρα +ἅμα +ἐάν +ἐγώ +ἐγὼ +ἐκ +ἐμόσ +ἐμὸς +ἐν +ἐξ +ἐπί +ἐπεὶ +ἐπὶ +ἐστι +ἐφ +ἐὰν +ἑαυτοῦ +ἔτι +ἡ +ἢ +ἣ +ἤ +ἥ +ἧς +ἵνα +ὁ +ὃ +ὃν +ὃς +ὅ +ὅδε +ὅθεν +ὅπερ +ὅς +ὅσ +ὅστις +ὅστισ +ὅτε +ὅτι +ὑμόσ +ὑπ +ὑπέρ +ὑπό +ὑπὲρ +ὑπὸ +ὡς +ὡσ +ὥς +ὥστε +ὦ +ᾧ +∆α +∆ε +∆εινα +∆εν +∆εξια +∆ηθεν +∆ηλα∆η +∆ι +∆ια +∆ιαρκως +∆ικα +∆ικο +∆ικοι +∆ικος +∆ικου +∆ικους +∆ιολου +∆ιπλα +∆ιχως \ No newline at end of file diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java index 8657b58..0487e5b 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java @@ -2,12 +2,15 @@ package eu.dnetlib.pace.clustering; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import eu.dnetlib.pace.AbstractPaceTest; import eu.dnetlib.pace.common.AbstractPaceFunctions; import eu.dnetlib.pace.config.DedupConfig; import org.junit.jupiter.api.*; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; public class ClusteringFunctionTest extends AbstractPaceTest { @@ -187,5 +190,14 @@ public class ClusteringFunctionTest extends AbstractPaceTest { System.out.println("s5 = " + s5); System.out.println(cf.apply(conf, Lists.newArrayList(title(s5)))); + final String s6 = "National and Kapodistrian University of Athens"; + System.out.println("s6 = " + s6); + System.out.println(cf.apply(conf, Lists.newArrayList(title(s6)))); + + final String s7 = "Εθνικό και Καποδιστριακό Πανεπιστήμιο Αθηνών"; + System.out.println("s7 = " + s7); + System.out.println(cf.apply(conf, Lists.newArrayList(title(s7)))); + } + } diff --git a/pom.xml b/pom.xml index a59c543..4a7fb94 100644 --- a/pom.xml +++ b/pom.xml @@ -424,6 +424,12 @@ test + + com.ibm.icu + icu4j + 70.1 + +