diff --git a/dnet-dedup-test/src/main/java/eu/dnetlib/ConnectedComponent.java b/dnet-dedup-test/src/main/java/eu/dnetlib/ConnectedComponent.java index 8ab587b..35149c0 100644 --- a/dnet-dedup-test/src/main/java/eu/dnetlib/ConnectedComponent.java +++ b/dnet-dedup-test/src/main/java/eu/dnetlib/ConnectedComponent.java @@ -1,13 +1,16 @@ package eu.dnetlib; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.PaceException; import org.codehaus.jackson.annotate.JsonIgnore; import java.io.IOException; import java.io.Serializable; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -15,13 +18,15 @@ public class ConnectedComponent implements Serializable { private Set docs; private String id; + private Map fieldMap; public ConnectedComponent() { } - public ConnectedComponent(String id, Set docs) { - this.id = id; + public ConnectedComponent(Set docs) { this.docs = docs; + this.id = createID(docs); + this.fieldMap = chooseFields(docs); } public Set getDocs() { @@ -40,14 +45,28 @@ public class ConnectedComponent implements Serializable { this.id = id; } - public void initializeID() { + public Map chooseFields(Set docs) { + + int maxLength = 0; + Map maxFieldMap = new HashMap<>(); + for (MapDocument doc : docs) { + if (doc.toString().length()>maxLength){ + maxFieldMap = doc.getFieldMap(); + maxLength = doc.toString().length(); + } + } + + return maxFieldMap; + } + + public String createID(Set docs) { if (docs.size() > 1) { String ccID = getMin(docs.stream().map(doc -> doc.getIdentifier()).collect(Collectors.toList())); String prefix = ccID.split("\\|")[0]; String id = ccID.split("::")[1]; - this.id = prefix + "|dedup_______::" + id; + return prefix + "|dedup_______::" + id; } else { - this.id = docs.iterator().next().getIdentifier(); + return docs.iterator().next().getIdentifier(); } } @@ -72,4 +91,12 @@ public class ConnectedComponent implements Serializable { throw new PaceException("Failed to create Json: ", e); } } + + public Map getFieldMap() { + return fieldMap; + } + + public void setFieldMap(Map fieldMap) { + this.fieldMap = fieldMap; + } } diff --git a/dnet-dedup-test/src/main/java/eu/dnetlib/SparkLocalTest.java b/dnet-dedup-test/src/main/java/eu/dnetlib/SparkLocalTest.java index 4d3e003..5193a57 100644 --- a/dnet-dedup-test/src/main/java/eu/dnetlib/SparkLocalTest.java +++ b/dnet-dedup-test/src/main/java/eu/dnetlib/SparkLocalTest.java @@ -32,11 +32,11 @@ public class SparkLocalTest { final JavaSparkContext context = new JavaSparkContext(spark.sparkContext()); - final URL dataset = SparkLocalTest.class.getResource("/eu/dnetlib/pace/softwares.json"); + final URL dataset = SparkLocalTest.class.getResource("/eu/dnetlib/pace/organization.to.fix.json"); final JavaRDD dataRDD = context.textFile(dataset.getPath()); //read the configuration from the classpath - final DedupConfig config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/software.test.pace.conf", SparkLocalTest.class)); + final DedupConfig config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/org.curr.conf", SparkLocalTest.class)); Map accumulators = Utility.constructAccumulator(config, context.sc()); @@ -83,15 +83,12 @@ public class SparkLocalTest { //print deduped connectedComponents.foreach(cc -> { - System.out.println("cc = " + cc.getId()); - for (MapDocument doc: cc.getDocs()) { - System.out.println(doc); - } + System.out.println(cc); + }); + //print nondeduped + nonDeduplicated.foreach(cc -> { + System.out.println(cc); }); -// //print nondeduped -// nonDeduplicated.foreach(cc -> { -// System.out.println("nd = " + cc.getId()); -// }); System.out.println("Non duplicates: " + nonDeduplicated.count()); System.out.println("Duplicates: " + connectedComponents.flatMap(cc -> cc.getDocs().iterator()).count()); diff --git a/dnet-dedup-test/src/main/java/eu/dnetlib/graph/GraphProcessor.scala b/dnet-dedup-test/src/main/java/eu/dnetlib/graph/GraphProcessor.scala index f893570..bc63101 100644 --- a/dnet-dedup-test/src/main/java/eu/dnetlib/graph/GraphProcessor.scala +++ b/dnet-dedup-test/src/main/java/eu/dnetlib/graph/GraphProcessor.scala @@ -32,8 +32,7 @@ object GraphProcessor { def asConnectedComponent(group: (VertexId, Iterable[MapDocument])): ConnectedComponent = { val docs = group._2.toSet[MapDocument] - val connectedComponent = new ConnectedComponent("empty", JavaConversions.setAsJavaSet[MapDocument](docs)); - connectedComponent.initializeID(); + val connectedComponent = new ConnectedComponent(JavaConversions.setAsJavaSet[MapDocument](docs)); connectedComponent } diff --git a/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/org.curr.conf b/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/org.curr.conf index fd4fbbe..e848061 100644 --- a/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/org.curr.conf +++ b/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/org.curr.conf @@ -5,7 +5,7 @@ "entityType" : "organization", "orderField" : "legalname", "queueMaxSize" : "2000", - "groupMaxSize" : "10", + "groupMaxSize" : "50", "slidingWindowSize" : "200", "rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ], "includeChildren" : "true" @@ -20,13 +20,12 @@ { "name" : "exactMatch", "fields" : [ "gridid" ] } ], "conditions" : [ - { "name" : "exactMatch", "fields" : [ "country" ] }, - { "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] } + { "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] }, + { "name" : "exactMatch", "fields" : [ "country" ] } ], "model" : [ - { "name" : "legalname", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value" }, - { "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/country/classid" }, - { "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "true", "path" : "organization/metadata/legalshortname/value" }, + { "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/country/classid" }, + { "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "false", "path" : "organization/metadata/legalshortname/value" }, { "name" : "legalname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.9", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value", "params" : {"windowSize" : 4, "threshold" : 0.5} }, { "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/websiteurl/value", "params" : { "host" : 0.5, "path" : 0.5 } }, { "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {grid}]/value" } diff --git a/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/organization.to.fix.json b/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/organization.to.fix.json index b56a10f..a867577 100644 --- a/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/organization.to.fix.json +++ b/dnet-dedup-test/src/main/resources/eu/dnetlib/pace/organization.to.fix.json @@ -1,24 +1,30 @@ -{"dateoftransformation": "2018-09-13", "originalId": ["opendoar____::Fonds_zur_F\u00f6rderung_der_wissenschaftlichen_Forschung_(Austrian_Science_Fund)"], "collectedfrom": [{"value": "OpenDOAR", "key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "FWF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.fwf.ac.at/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Fonds zur F\u00f6rderung der wissenschaftlichen Forschung (Austrian Science Fund)"}, "country": {"classid": "AT", "classname": "Austria", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2015-08-24", "type": 20, "id": "20|opendoar____::77e7cd67c60d0c18aa835ea6ea58122c"} -{"dateoftransformation": "2018-12-15", "originalId": ["corda__h2020::998735960"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "FWF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.fwf.ac.at"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "FONDS ZUR F\u00d6RDERUNG DER WISSENSCHAFTLICHEN FORSCHUNG"}, "country": {"classid": "AT", "classname": "Austria", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda__h2020::83f579158b682262181b9a8ffdfa1124"} -{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::998735960"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse", "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "FWF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.fwf.ac.at"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "FONDS ZUR F\u00d6RDERUNG DER WISSENSCHAFTLICHEN FORSCHUNG"}, "country": {"classid": "AT", "classname": "Austria", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::83f579158b682262181b9a8ffdfa1124"} -{"dateoftransformation": "2018-09-27", "originalId": ["re3data_____::9f4430cdb5474d6db4bf84834533a7c9"], "collectedfrom": [{"value": "Registry of Research Data Repository", "key": "10|openaire____::21f8a223b9925c2f87c404096080b046"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "FWF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "https://www.fwf.ac.at/en/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Fonds zur F\u00f6rderung der wissenschaftlichen Forschung"}, "country": {"classid": "AT", "classname": "Austria", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-09-27", "type": 20, "id": "20|re3data_____::a3ac0376cc2a582357d821cec70a3e5b"} -{"dateoftransformation": "2018-12-15", "originalId": ["corda__h2020::999861936"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "UNITO"}, "ecresearchorganization": {"value": "true"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.unito.it"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "UNIVERSITA DEGLI STUDI DI TORINO"}, "country": {"classid": "IT", "classname": "Italy", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "true"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda__h2020::ef77a7bbe5796b0b47aa60947a5c6f41"} -{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::999861936"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse", "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "UNITO"}, "ecresearchorganization": {"value": "true"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.unito.it"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "UNIVERSITA DEGLI STUDI DI TORINO"}, "country": {"classid": "IT", "classname": "Italy", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "true"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::ef77a7bbe5796b0b47aa60947a5c6f41"} -{"dateoftransformation": "2018-09-13", "originalId": ["nih_________::UNIVERSITA_DI_TORINO"], "collectedfrom": [{"value": "NIH - National Institutes of Health", "key": "10|openaire____::9e9e8c76d739212c63eff362e321ba33"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "UNIVERSITA DI TORINO"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-07-11", "type": 20, "id": "20|nih_________::fdd37fcef9df7c69ae7d620bf21ab272"} -{"dateoftransformation": "2018-09-19", "originalId": ["doajarticles::Universit\u00e0_degli_Studi_di_Torino"], "collectedfrom": [{"value": "DOAJ-Articles", "key": "10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "Universit\u00e0 degli Studi di Torino"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Universit\u00e0 degli Studi di Torino"}, "country": {"classid": "IT", "classname": "Italy", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-09-19", "type": 20, "id": "20|doajarticles::f7ef827f8fe1d870b6464ef1affc9605"} -{"dateoftransformation": "2018-11-12", "originalId": ["opendoar____::Universit\u00e0_degli_Studi_di_Torino"], "collectedfrom": [{"value": "OpenDOAR", "key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.unito.it/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Universit\u00e0 degli Studi di Torino"}, "country": {"classid": "IT", "classname": "Italy", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-11-12", "type": 20, "id": "20|opendoar____::f7ef827f8fe1d870b6464ef1affc9605"} -{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "RPF"}, "websiteurl": {"value": "http://www.research.org.cy/EN/index.html/"}, "country": {"classid": "CY", "classname": "Cyprus", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "RPF"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.14751.36"}], "id": "20|grid________::4f35352983a82950563eadfea49dc867", "type": 20} -{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "RPF"}, "websiteurl": {"value": "http://www.research.org.cy/EN/index.html/"}, "country": {"classid": "CY", "classname": "Cyprus", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "Research Promotion Foundation"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.14751.36"}], "id": "20|grid________::a42b3c67ea94b54ee941fb42fefd51d6", "type": 20} -{"dateoftransformation": "2018-08-08", "originalId": ["corda__h2020::999946035"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "RPF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.research.org.cy"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "IDRYMA PROOTHISIS EREVNAS"}, "country": {"classid": "CY", "classname": "Cyprus", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-21", "type": 20, "id": "20|corda__h2020::a16918f80d830bf2b6daa5ec304f0e31"} -{"dateoftransformation": "2018-08-08", "originalId": ["corda_______::999946035"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse", "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "RPF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.research.org.cy"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "RESEARCH PROMOTION FOUNDATION"}, "country": {"classid": "CY", "classname": "Cyprus", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2015-09-10", "type": 20, "id": "20|corda_______::a16918f80d830bf2b6daa5ec304f0e31"} -{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "DFG"}, "websiteurl": {"value": "http://www.dfg.de/en/"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.424150.6"}], "id": "20|grid________::7d83de934ecd5091d83334f752cef22c", "type": 20} -{"dateoftransformation": "2018-08-08", "originalId": ["corda_______::999547462"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse", "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "ecnonprofit": {"value": "true"}, "websiteurl": {"value": "http://www.dfg.de"}, "ecnutscode": {"value": "false"}, "legalname": {"value": "DEUTSCHE FORSCHUNGSGEMEINSCHAFT"}}}, "dateofcollection": "2015-09-10", "type": 20, "id": "20|corda_______::3f41cfb7d56cfea69f3ce9792b822eb4"} -{"dateoftransformation": "2018-09-28", "originalId": ["dfgf________::DFG"], "collectedfrom": [{"value": "", "key": ""}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "DFG"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-09-28", "type": 20, "id": "20|dfgf________::3bbe57698e353a2acaa03306316658bb"} -{"dateoftransformation": "2018-09-28", "originalId": ["dfgf________::DFGF"], "collectedfrom": [{"value": "", "key": ""}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "DFG"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-09-28", "type": 20, "id": "20|dfgf________::14a2847759c496334d510ff8fafbd464"} -{"dateoftransformation": "2018-06-04", "originalId": ["re3data_____::bf9c8e5c69ff222e3ee2ff0fc4d2b289"], "collectedfrom": [{"value": "Registry of Research Data Repository", "key": "10|openaire____::21f8a223b9925c2f87c404096080b046"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "German Research Foundation"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.dfg.de/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-07", "type": 20, "id": "20|re3data_____::fbb08ab5e8cf8cd1056f61b84ddf05dd"} -{"originalId": ["https://academic.microsoft.com/#/detail/87707601"], "pid": [{"qualifier": {"classid": "urn", "classname": "urn", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "http://en.wikipedia.org/wiki/Deutsche_Forschungsgemeinschaft"}, {"qualifier": {"classid": "mag_id", "classname": "Microsoft Academic Graph Identifier", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "https://academic.microsoft.com/#/detail/87707601"}, {"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.424150.6"}], "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "organization": {"metadata": {"websiteurl": {"value": "http://www.dfg.de/"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}}}, "type": 20, "id": "20|microsoft___::e2edddabcc31b692b4ca7b89456e750a"} -{"dateoftransformation": "2018-08-08", "originalId": ["corda__h2020::999547462"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "DFG"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.dfg.de"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "DEUTSCHE FORSCHUNGSGEMEINSCHAFT"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-21", "type": 20, "id": "20|corda__h2020::3f41cfb7d56cfea69f3ce9792b822eb4"} -{"dateoftransformation": "2018-06-04", "originalId": ["re3data_____::64ef0759fcfccf84cca028ba3c21aa23"], "collectedfrom": [{"value": "Registry of Research Data Repository", "key": "10|openaire____::21f8a223b9925c2f87c404096080b046"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "Deutsche Forschungsgemeinschaft"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.dfg.de/en/index.jsp"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "German Research Foundation"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-07", "type": 20, "id": "20|re3data_____::e029b7e0de6cafc0c7126615c65458f0"} -{"dateoftransformation": "2018-06-04", "originalId": ["re3data_____::37e3bba353f88b4649d459c698483f6e"], "collectedfrom": [{"value": "Registry of Research Data Repository", "key": "10|openaire____::21f8a223b9925c2f87c404096080b046"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "Deutsche Forschungsgemeinschaft"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.dfg.de/en/index.jsp"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "German Research Association"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-07", "type": 20, "id": "20|re3data_____::2080dc170e6cd7c6c06f403f8a08c1be"} -{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "DFG"}, "websiteurl": {"value": "http://www.dfg.de/en/"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "DFG"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.424150.6"}], "id": "20|grid________::085fd89ec6f3f92c354e0bc027de2a58", "type": 20} -{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "DFG"}, "websiteurl": {"value": "http://www.dfg.de/en/"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "German Research Foundation"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.424150.6"}], "id": "20|grid________::f0d88189673738d2a565aff99eeb59a2", "type": 20} \ No newline at end of file +{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Humboldt_State_University"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.humboldt.edu/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt State University"},"country":{"classid":"US","classname":"United States","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::9c3522c59aef0edab19b8a3f0aeb39ed"} +{"dateoftransformation":"2019-05-19","originalId":["rcuk________::9758583A-FF1E-41C4-9176-B875E8FAC110"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt State University"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-04","type":20,"id":"20|rcuk________::7715018b4838eaf1d57242c788e222d4"} +{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Humboldt-Universität_zu_Berlin"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"HU"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::13ab9ef517038d3751f4b0e31aea9ac7"} +{"dateoftransformation":"2018-09-27","originalId":["re3data_____::678d9d5a712331f6e2fce7b7b764090f"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/de/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-27","type":20,"id":"20|re3data_____::aeb488fd15eb1be77b998b5602450910"} +{"dateoftransformation":"2019-05-19","originalId":["rcuk________::CFF4C944-5CF1-4AE3-8C03-BE361D6DEDC3"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt University Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::ff1bccdf9520b3fadd2fc26103231de0"} +{"dateoftransformation":"2018-09-27","originalId":["re3data_____::4bda5f07be19914ce8e2e4652a72151c"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-27","type":20,"id":"20|re3data_____::d72a4f4665f7df9b48a22d4cfde0dd3c"} +{"dateoftransformation":"2018-09-13","originalId":["nih_________::HUMBOLDT_STATE_UNIVERSITY"],"collectedfrom":[{"value":"NIH - National Institutes of Health","key":"10|openaire____::9e9e8c76d739212c63eff362e321ba33"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT STATE UNIVERSITY"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-07-14","type":20,"id":"20|nih_________::8aec7ec3198fc69ce74e24b8f6aa9a59"} +{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Humboldt-Universität_zu_Berlin"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Humboldt-Universität zu Berlin"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::13ab9ef517038d3751f4b0e31aea9ac7"} +{"dateoftransformation":"2018-11-20","originalId":["corda_______::999850781"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UBER"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.hu-berlin.de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT-UNIVERSITAT ZU BERLIN"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::d999b4c2dc81ccd40100056ab0543088"} +{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999850781"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UBER"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.hu-berlin.de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT-UNIVERSITAET ZU BERLIN"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::d999b4c2dc81ccd40100056ab0543088"} +{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::9169966C-E38A-41D7-AF04-F7470963CBED"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-04", "type": 20, "id": "20|rcuk________::3eb464c9a21582d7dbb3f115710d863c"} +{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::CEEF86B3-BB98-4CAE-848D-00837C745DEC"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Yerevan Physics Institute"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-03", "type": 20, "id": "20|rcuk________::f8790ac2aa4eb6fb7cc2980eb7971ee6"} +{"dateoftransformation": "2018-09-13", "originalId": ["opendoar____::Aalto_University"], "collectedfrom": [{"key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb", "value": "OpenDOAR"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.aalto.fi/en/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2015-08-24", "type": 20, "id": "20|opendoar____::98845925f422ef4987294d6bfac525dd"} +{"dateoftransformation": "2018-09-13", "originalId": ["nsf_________::Institute_of_Physics"], "collectedfrom": [{"key": "10|openaire____::dd69b4a1513c9de9f46faf24048da1e8", "value": "NSF - National Science Foundation"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics"}, "country": {"classid": "YU", "classname": "Yugoslavia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-03-10", "type": 20, "id": "20|nsf_________::d540e5e89b1ace31d5dd0b8f658056ec"} +{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::EE4840D3-84C0-47A3-9109-30F67D0D550F"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-03", "type": 20, "id": "20|rcuk________::7559c8bbff5125d74919775a1f290496"} +{"dateoftransformation": "2019-05-29", "originalId": ["irb_hr______::Institute of Physics, Zagreb"], "collectedfrom": [{"key": "10|openaire____::db600878200645bd752cf7fd96a37df5", "value": "Rudjer Boskovic Institute Library - Croatian Projects"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics, Zagreb"}, "country": {"classid": "HR", "classname": "Croatia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-06-15", "type": 20, "id": "20|irb_hr______::d0147c5dfa57d00b5bbd8405366d5ed9"} +{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::918297740"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "false"}, "websiteurl": {"value": "http://www.ifv.nl"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "country": {"classid": "NL", "classname": "Netherlands", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::490e6333fc4b5b2f0bfbb94875b57911"} +{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::987994083"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "false"}, "websiteurl": {"value": "http://www.ifv.nl"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "country": {"classid": "NL", "classname": "Netherlands", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::3ace7e70172b7ddce2ffc8db335e7cd3"} +{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::999637672"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "ANL"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "true"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "A I ALIKHANYAN NATIONAL SCIENCE LABORATORY"}, "country": {"classid": "AM", "classname": "Armenia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::f76c86a31f38609cd3b7930279d9c7c6"} +{"dateoftransformation": "2019-04-16", "originalId": ["aka_________::3117bf00abc3330b48bb270494d46ce4"], "collectedfrom": [{"key": "10|openaire____::6ac933301a3933c8a22ceebea7000326", "value": "Academy of Finland"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2019-01-25", "type": 20, "id": "20|aka_________::c32beace3046af7a121b15237b1e4747"} +{"dateoftransformation":"2019-05-04","originalId":["opendoar____::Free_University_of_Bozen_-_Bolzano"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.unibz.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Free University of Bozen - Bolzano"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2019-05-04","type":20,"id":"20|opendoar____::c230b60ca1a8a95150c3163e40899e5d"} +{"dateoftransformation":"2019-05-19","originalId":["rcuk________::E7C60D41-51F7-4C46-89DC-4E8F6D7DC64B"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Free University of Bozen-Bolzano"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-07","type":20,"id":"20|rcuk________::47a7d6a960f874fdd7c2678f16276cbf"} +{"dateoftransformation":"2019-05-19","originalId":["rcuk________::69417031-F8F1-4557-BF08-49096CDBF321"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"University of the Free State"},"country":{"classid":"ZA","classname":"South Africa","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::91d972791a1a3945078724a4ede959d4"} +{"dateoftransformation":"2019-05-19","originalId":["rcuk________::9218106A-E8CE-46A5-AABC-B4C8ED148690"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"University of Amsterdam"},"country":{"classid":"NL","classname":"Netherlands","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::77c8206d9739a62c542db14a00d51fc9"} +{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Université_Libre_de_Bruxelles"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.ulb.ac.be/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Université Libre de Bruxelles"},"country":{"classid":"BE","classname":"Belgium","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::4348f2277945e85ff4fa371c89d5209e"} +{"dateoftransformation":"2018-09-13","originalId":["nsf_________::Liberty_University__Inc_"],"collectedfrom":[{"value":"NSF - National Science Foundation","key":"10|openaire____::dd69b4a1513c9de9f46faf24048da1e8"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Liberty University, Inc."},"country":{"classid":"US","classname":"United States","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-03-10","type":20,"id":"20|nsf_________::ef77f1e8314313a4d53ec4f19054b733"} +{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Vrije_Universiteit_Amsterdam"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"VU"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.vu.nl/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Vrije Universiteit Amsterdam"},"country":{"classid":"NL","classname":"Netherlands","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::40e0928728ca1ea6ebb147ad307fc7db"} +{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Burgas_Free_University"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Бургаски свободен университет"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.bfu.bg/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Burgas Free University"},"country":{"classid":"BG","classname":"Bulgaria","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::28a99bd2330504b0dfb6c44192757bde"} +{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Université_libre_de_Bruxelles"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.ulb.ac.be/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Université libre de Bruxelles"},"country":{"classid":"BE","classname":"Belgium","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-07-21","type":20,"id":"20|opendoar____::0e3d292f95a8f13fed04d7b3ac872b9f"} +{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Freie_Universitat_Berlin"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Freie Universitat Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::5054b113a655361d929493a95d29e6f1"} \ No newline at end of file diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java index cf68e74..c78163d 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java @@ -35,11 +35,32 @@ public abstract class AbstractCondition extends AbstractPaceFunctions implements final Field va = a.values(fd.getName()); final Field vb = b.values(fd.getName()); - if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) { - res.put(fd.getName(), new ConditionEval(cond, va, vb, 0)); - } else { + if (fd.isIgnoreMissing()) { res.put(fd.getName(), verify(fd, va, vb)); } + else { + if (va.isEmpty() || vb.isEmpty()) { + res.put(fd.getName(), new ConditionEval(cond, va, vb, -1)); + } + else { + res.put(fd.getName(), verify(fd, va, vb)); + } + } + + + +// // if ignoreMissing=true always return undefined (0) in case of missing +// if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) { +// res.put(fd.getName(), new ConditionEval(cond, va, vb, 0)); +// } else { +// if (va.isEmpty()&&vb.isEmpty()) { +// res.put(fd.getName(), new ConditionEval(cond, va, vb, -1)); +// } +// else { +// res.put(fd.getName(), verify(fd, va, vb)); +// } +// } + } return res; } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/DomainExactMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/DomainExactMatch.java index e415f15..dffe2ca 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/DomainExactMatch.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/DomainExactMatch.java @@ -21,6 +21,8 @@ public class DomainExactMatch extends ExactMatchIgnoreCase { private URL asUrl(final String value) { try { + if (value.isEmpty()) + return new URL("http://"); return new URL(value); } catch (MalformedURLException e) { // should not happen as checked by pace typing diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java index 2776576..a4cd847 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java @@ -27,7 +27,14 @@ public class ExactMatch extends AbstractCondition { int res; - if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) { +// if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) { +// res = 0; +// } else { +// res = fa.equals(fb) ? 1 : -1; +// } + + //if there is a blank, undefined result + if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) { res = 0; } else { res = fa.equals(fb) ? 1 : -1; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatchIgnoreCase.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatchIgnoreCase.java index 7741f38..e9925ec 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatchIgnoreCase.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatchIgnoreCase.java @@ -5,6 +5,7 @@ import java.util.List; import eu.dnetlib.pace.distance.eval.ConditionEval; import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.FieldDef; +import org.apache.commons.lang.StringUtils; /** * The Class ExactMatch. @@ -24,7 +25,15 @@ public class ExactMatchIgnoreCase extends AbstractCondition { final String fa = getValue(a); final String fb = getValue(b); - return new ConditionEval(cond, a, b, fa.equalsIgnoreCase(fb) ? 1 : -1); + int res; + + if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) { + res = 0; + } else { + res = fa.equalsIgnoreCase(fb) ? 1 : -1; + } + + return new ConditionEval(cond, a, b, res); } protected String getValue(final Field f) { diff --git a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv index 55d114c..ef49c2f 100644 --- a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv +++ b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv @@ -1,4 +1,4 @@ -key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;Uniwersytet;университет;universiteit;πανεπιστήμιο +key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;universitaet;Uniwersytet;университет;universiteit;πανεπιστήμιο key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java index c92c6fe..3943e4f 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java @@ -48,9 +48,10 @@ public class DistanceAlgoTest extends AbstractPaceFunctions { @Test public void testJaroWinklerNormalizedName() { final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params); - double result = jaroWinklerNormalizedName.distance("Universita di Pisa", "Universita di Parma"); + double result = jaroWinklerNormalizedName.distance("Free University of Bozen-Bolzano", "University of the Free State"); - assertEquals(result, 0.0); + System.out.println("result = " + result); + assertEquals(1.0, result); } @Test