exact match condition gives undefined if a field is missing, ignoremissing semantics changed: now performs the comparison in any case if =true, if false gives -1 in case of missing

This commit is contained in:
miconis 2019-06-18 14:05:31 +02:00
parent a5526f6254
commit e7d170d0eb
11 changed files with 123 additions and 55 deletions

View File

@ -1,13 +1,16 @@
package eu.dnetlib;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.util.PaceException;
import org.codehaus.jackson.annotate.JsonIgnore;
import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@ -15,13 +18,15 @@ public class ConnectedComponent implements Serializable {
private Set<MapDocument> docs;
private String id;
private Map<String, Field> fieldMap;
public ConnectedComponent() {
}
public ConnectedComponent(String id, Set<MapDocument> docs) {
this.id = id;
public ConnectedComponent(Set<MapDocument> docs) {
this.docs = docs;
this.id = createID(docs);
this.fieldMap = chooseFields(docs);
}
public Set<MapDocument> getDocs() {
@ -40,14 +45,28 @@ public class ConnectedComponent implements Serializable {
this.id = id;
}
public void initializeID() {
public Map<String, Field> chooseFields(Set<MapDocument> docs) {
int maxLength = 0;
Map<String, Field> maxFieldMap = new HashMap<>();
for (MapDocument doc : docs) {
if (doc.toString().length()>maxLength){
maxFieldMap = doc.getFieldMap();
maxLength = doc.toString().length();
}
}
return maxFieldMap;
}
public String createID(Set<MapDocument> docs) {
if (docs.size() > 1) {
String ccID = getMin(docs.stream().map(doc -> doc.getIdentifier()).collect(Collectors.toList()));
String prefix = ccID.split("\\|")[0];
String id = ccID.split("::")[1];
this.id = prefix + "|dedup_______::" + id;
return prefix + "|dedup_______::" + id;
} else {
this.id = docs.iterator().next().getIdentifier();
return docs.iterator().next().getIdentifier();
}
}
@ -72,4 +91,12 @@ public class ConnectedComponent implements Serializable {
throw new PaceException("Failed to create Json: ", e);
}
}
public Map<String, Field> getFieldMap() {
return fieldMap;
}
public void setFieldMap(Map<String, Field> fieldMap) {
this.fieldMap = fieldMap;
}
}

View File

@ -32,11 +32,11 @@ public class SparkLocalTest {
final JavaSparkContext context = new JavaSparkContext(spark.sparkContext());
final URL dataset = SparkLocalTest.class.getResource("/eu/dnetlib/pace/softwares.json");
final URL dataset = SparkLocalTest.class.getResource("/eu/dnetlib/pace/organization.to.fix.json");
final JavaRDD<String> dataRDD = context.textFile(dataset.getPath());
//read the configuration from the classpath
final DedupConfig config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/software.test.pace.conf", SparkLocalTest.class));
final DedupConfig config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/org.curr.conf", SparkLocalTest.class));
Map<String, LongAccumulator> accumulators = Utility.constructAccumulator(config, context.sc());
@ -83,15 +83,12 @@ public class SparkLocalTest {
//print deduped
connectedComponents.foreach(cc -> {
System.out.println("cc = " + cc.getId());
for (MapDocument doc: cc.getDocs()) {
System.out.println(doc);
}
System.out.println(cc);
});
//print nondeduped
nonDeduplicated.foreach(cc -> {
System.out.println(cc);
});
// //print nondeduped
// nonDeduplicated.foreach(cc -> {
// System.out.println("nd = " + cc.getId());
// });
System.out.println("Non duplicates: " + nonDeduplicated.count());
System.out.println("Duplicates: " + connectedComponents.flatMap(cc -> cc.getDocs().iterator()).count());

View File

@ -32,8 +32,7 @@ object GraphProcessor {
def asConnectedComponent(group: (VertexId, Iterable[MapDocument])): ConnectedComponent = {
val docs = group._2.toSet[MapDocument]
val connectedComponent = new ConnectedComponent("empty", JavaConversions.setAsJavaSet[MapDocument](docs));
connectedComponent.initializeID();
val connectedComponent = new ConnectedComponent(JavaConversions.setAsJavaSet[MapDocument](docs));
connectedComponent
}

View File

@ -5,7 +5,7 @@
"entityType" : "organization",
"orderField" : "legalname",
"queueMaxSize" : "2000",
"groupMaxSize" : "10",
"groupMaxSize" : "50",
"slidingWindowSize" : "200",
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
"includeChildren" : "true"
@ -20,13 +20,12 @@
{ "name" : "exactMatch", "fields" : [ "gridid" ] }
],
"conditions" : [
{ "name" : "exactMatch", "fields" : [ "country" ] },
{ "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] }
{ "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] },
{ "name" : "exactMatch", "fields" : [ "country" ] }
],
"model" : [
{ "name" : "legalname", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value" },
{ "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/country/classid" },
{ "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "true", "path" : "organization/metadata/legalshortname/value" },
{ "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/country/classid" },
{ "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "false", "path" : "organization/metadata/legalshortname/value" },
{ "name" : "legalname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.9", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value", "params" : {"windowSize" : 4, "threshold" : 0.5} },
{ "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/websiteurl/value", "params" : { "host" : 0.5, "path" : 0.5 } },
{ "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {grid}]/value" }

View File

@ -1,24 +1,30 @@
{"dateoftransformation": "2018-09-13", "originalId": ["opendoar____::Fonds_zur_F\u00f6rderung_der_wissenschaftlichen_Forschung_(Austrian_Science_Fund)"], "collectedfrom": [{"value": "OpenDOAR", "key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "FWF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.fwf.ac.at/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Fonds zur F\u00f6rderung der wissenschaftlichen Forschung (Austrian Science Fund)"}, "country": {"classid": "AT", "classname": "Austria", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2015-08-24", "type": 20, "id": "20|opendoar____::77e7cd67c60d0c18aa835ea6ea58122c"}
{"dateoftransformation": "2018-12-15", "originalId": ["corda__h2020::998735960"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "FWF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.fwf.ac.at"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "FONDS ZUR F\u00d6RDERUNG DER WISSENSCHAFTLICHEN FORSCHUNG"}, "country": {"classid": "AT", "classname": "Austria", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda__h2020::83f579158b682262181b9a8ffdfa1124"}
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::998735960"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse", "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "FWF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.fwf.ac.at"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "FONDS ZUR F\u00d6RDERUNG DER WISSENSCHAFTLICHEN FORSCHUNG"}, "country": {"classid": "AT", "classname": "Austria", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::83f579158b682262181b9a8ffdfa1124"}
{"dateoftransformation": "2018-09-27", "originalId": ["re3data_____::9f4430cdb5474d6db4bf84834533a7c9"], "collectedfrom": [{"value": "Registry of Research Data Repository", "key": "10|openaire____::21f8a223b9925c2f87c404096080b046"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "FWF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "https://www.fwf.ac.at/en/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Fonds zur F\u00f6rderung der wissenschaftlichen Forschung"}, "country": {"classid": "AT", "classname": "Austria", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-09-27", "type": 20, "id": "20|re3data_____::a3ac0376cc2a582357d821cec70a3e5b"}
{"dateoftransformation": "2018-12-15", "originalId": ["corda__h2020::999861936"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "UNITO"}, "ecresearchorganization": {"value": "true"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.unito.it"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "UNIVERSITA DEGLI STUDI DI TORINO"}, "country": {"classid": "IT", "classname": "Italy", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "true"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda__h2020::ef77a7bbe5796b0b47aa60947a5c6f41"}
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::999861936"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse", "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "UNITO"}, "ecresearchorganization": {"value": "true"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.unito.it"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "UNIVERSITA DEGLI STUDI DI TORINO"}, "country": {"classid": "IT", "classname": "Italy", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "true"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::ef77a7bbe5796b0b47aa60947a5c6f41"}
{"dateoftransformation": "2018-09-13", "originalId": ["nih_________::UNIVERSITA_DI_TORINO"], "collectedfrom": [{"value": "NIH - National Institutes of Health", "key": "10|openaire____::9e9e8c76d739212c63eff362e321ba33"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "UNIVERSITA DI TORINO"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-07-11", "type": 20, "id": "20|nih_________::fdd37fcef9df7c69ae7d620bf21ab272"}
{"dateoftransformation": "2018-09-19", "originalId": ["doajarticles::Universit\u00e0_degli_Studi_di_Torino"], "collectedfrom": [{"value": "DOAJ-Articles", "key": "10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "Universit\u00e0 degli Studi di Torino"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Universit\u00e0 degli Studi di Torino"}, "country": {"classid": "IT", "classname": "Italy", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-09-19", "type": 20, "id": "20|doajarticles::f7ef827f8fe1d870b6464ef1affc9605"}
{"dateoftransformation": "2018-11-12", "originalId": ["opendoar____::Universit\u00e0_degli_Studi_di_Torino"], "collectedfrom": [{"value": "OpenDOAR", "key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.unito.it/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Universit\u00e0 degli Studi di Torino"}, "country": {"classid": "IT", "classname": "Italy", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-11-12", "type": 20, "id": "20|opendoar____::f7ef827f8fe1d870b6464ef1affc9605"}
{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "RPF"}, "websiteurl": {"value": "http://www.research.org.cy/EN/index.html/"}, "country": {"classid": "CY", "classname": "Cyprus", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "RPF"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.14751.36"}], "id": "20|grid________::4f35352983a82950563eadfea49dc867", "type": 20}
{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "RPF"}, "websiteurl": {"value": "http://www.research.org.cy/EN/index.html/"}, "country": {"classid": "CY", "classname": "Cyprus", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "Research Promotion Foundation"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.14751.36"}], "id": "20|grid________::a42b3c67ea94b54ee941fb42fefd51d6", "type": 20}
{"dateoftransformation": "2018-08-08", "originalId": ["corda__h2020::999946035"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "RPF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.research.org.cy"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "IDRYMA PROOTHISIS EREVNAS"}, "country": {"classid": "CY", "classname": "Cyprus", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-21", "type": 20, "id": "20|corda__h2020::a16918f80d830bf2b6daa5ec304f0e31"}
{"dateoftransformation": "2018-08-08", "originalId": ["corda_______::999946035"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse", "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "RPF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.research.org.cy"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "RESEARCH PROMOTION FOUNDATION"}, "country": {"classid": "CY", "classname": "Cyprus", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2015-09-10", "type": 20, "id": "20|corda_______::a16918f80d830bf2b6daa5ec304f0e31"}
{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "DFG"}, "websiteurl": {"value": "http://www.dfg.de/en/"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.424150.6"}], "id": "20|grid________::7d83de934ecd5091d83334f752cef22c", "type": 20}
{"dateoftransformation": "2018-08-08", "originalId": ["corda_______::999547462"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse", "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "ecnonprofit": {"value": "true"}, "websiteurl": {"value": "http://www.dfg.de"}, "ecnutscode": {"value": "false"}, "legalname": {"value": "DEUTSCHE FORSCHUNGSGEMEINSCHAFT"}}}, "dateofcollection": "2015-09-10", "type": 20, "id": "20|corda_______::3f41cfb7d56cfea69f3ce9792b822eb4"}
{"dateoftransformation": "2018-09-28", "originalId": ["dfgf________::DFG"], "collectedfrom": [{"value": "", "key": ""}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "DFG"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-09-28", "type": 20, "id": "20|dfgf________::3bbe57698e353a2acaa03306316658bb"}
{"dateoftransformation": "2018-09-28", "originalId": ["dfgf________::DFGF"], "collectedfrom": [{"value": "", "key": ""}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "DFG"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-09-28", "type": 20, "id": "20|dfgf________::14a2847759c496334d510ff8fafbd464"}
{"dateoftransformation": "2018-06-04", "originalId": ["re3data_____::bf9c8e5c69ff222e3ee2ff0fc4d2b289"], "collectedfrom": [{"value": "Registry of Research Data Repository", "key": "10|openaire____::21f8a223b9925c2f87c404096080b046"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "German Research Foundation"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.dfg.de/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-07", "type": 20, "id": "20|re3data_____::fbb08ab5e8cf8cd1056f61b84ddf05dd"}
{"originalId": ["https://academic.microsoft.com/#/detail/87707601"], "pid": [{"qualifier": {"classid": "urn", "classname": "urn", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "http://en.wikipedia.org/wiki/Deutsche_Forschungsgemeinschaft"}, {"qualifier": {"classid": "mag_id", "classname": "Microsoft Academic Graph Identifier", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "https://academic.microsoft.com/#/detail/87707601"}, {"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.424150.6"}], "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "organization": {"metadata": {"websiteurl": {"value": "http://www.dfg.de/"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}}}, "type": 20, "id": "20|microsoft___::e2edddabcc31b692b4ca7b89456e750a"}
{"dateoftransformation": "2018-08-08", "originalId": ["corda__h2020::999547462"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "DFG"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.dfg.de"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "DEUTSCHE FORSCHUNGSGEMEINSCHAFT"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-21", "type": 20, "id": "20|corda__h2020::3f41cfb7d56cfea69f3ce9792b822eb4"}
{"dateoftransformation": "2018-06-04", "originalId": ["re3data_____::64ef0759fcfccf84cca028ba3c21aa23"], "collectedfrom": [{"value": "Registry of Research Data Repository", "key": "10|openaire____::21f8a223b9925c2f87c404096080b046"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "Deutsche Forschungsgemeinschaft"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.dfg.de/en/index.jsp"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "German Research Foundation"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-07", "type": 20, "id": "20|re3data_____::e029b7e0de6cafc0c7126615c65458f0"}
{"dateoftransformation": "2018-06-04", "originalId": ["re3data_____::37e3bba353f88b4649d459c698483f6e"], "collectedfrom": [{"value": "Registry of Research Data Repository", "key": "10|openaire____::21f8a223b9925c2f87c404096080b046"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "Deutsche Forschungsgemeinschaft"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.dfg.de/en/index.jsp"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "German Research Association"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-07", "type": 20, "id": "20|re3data_____::2080dc170e6cd7c6c06f403f8a08c1be"}
{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "DFG"}, "websiteurl": {"value": "http://www.dfg.de/en/"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "DFG"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.424150.6"}], "id": "20|grid________::085fd89ec6f3f92c354e0bc027de2a58", "type": 20}
{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "DFG"}, "websiteurl": {"value": "http://www.dfg.de/en/"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "German Research Foundation"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.424150.6"}], "id": "20|grid________::f0d88189673738d2a565aff99eeb59a2", "type": 20}
{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Humboldt_State_University"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.humboldt.edu/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt State University"},"country":{"classid":"US","classname":"United States","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::9c3522c59aef0edab19b8a3f0aeb39ed"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::9758583A-FF1E-41C4-9176-B875E8FAC110"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt State University"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-04","type":20,"id":"20|rcuk________::7715018b4838eaf1d57242c788e222d4"}
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Humboldt-Universität_zu_Berlin"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"HU"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::13ab9ef517038d3751f4b0e31aea9ac7"}
{"dateoftransformation":"2018-09-27","originalId":["re3data_____::678d9d5a712331f6e2fce7b7b764090f"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/de/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-27","type":20,"id":"20|re3data_____::aeb488fd15eb1be77b998b5602450910"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::CFF4C944-5CF1-4AE3-8C03-BE361D6DEDC3"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt University Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::ff1bccdf9520b3fadd2fc26103231de0"}
{"dateoftransformation":"2018-09-27","originalId":["re3data_____::4bda5f07be19914ce8e2e4652a72151c"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-27","type":20,"id":"20|re3data_____::d72a4f4665f7df9b48a22d4cfde0dd3c"}
{"dateoftransformation":"2018-09-13","originalId":["nih_________::HUMBOLDT_STATE_UNIVERSITY"],"collectedfrom":[{"value":"NIH - National Institutes of Health","key":"10|openaire____::9e9e8c76d739212c63eff362e321ba33"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT STATE UNIVERSITY"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-07-14","type":20,"id":"20|nih_________::8aec7ec3198fc69ce74e24b8f6aa9a59"}
{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Humboldt-Universität_zu_Berlin"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Humboldt-Universität zu Berlin"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::13ab9ef517038d3751f4b0e31aea9ac7"}
{"dateoftransformation":"2018-11-20","originalId":["corda_______::999850781"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UBER"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.hu-berlin.de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT-UNIVERSITAT ZU BERLIN"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::d999b4c2dc81ccd40100056ab0543088"}
{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999850781"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UBER"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.hu-berlin.de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT-UNIVERSITAET ZU BERLIN"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::d999b4c2dc81ccd40100056ab0543088"}
{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::9169966C-E38A-41D7-AF04-F7470963CBED"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-04", "type": 20, "id": "20|rcuk________::3eb464c9a21582d7dbb3f115710d863c"}
{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::CEEF86B3-BB98-4CAE-848D-00837C745DEC"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Yerevan Physics Institute"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-03", "type": 20, "id": "20|rcuk________::f8790ac2aa4eb6fb7cc2980eb7971ee6"}
{"dateoftransformation": "2018-09-13", "originalId": ["opendoar____::Aalto_University"], "collectedfrom": [{"key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb", "value": "OpenDOAR"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.aalto.fi/en/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2015-08-24", "type": 20, "id": "20|opendoar____::98845925f422ef4987294d6bfac525dd"}
{"dateoftransformation": "2018-09-13", "originalId": ["nsf_________::Institute_of_Physics"], "collectedfrom": [{"key": "10|openaire____::dd69b4a1513c9de9f46faf24048da1e8", "value": "NSF - National Science Foundation"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics"}, "country": {"classid": "YU", "classname": "Yugoslavia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-03-10", "type": 20, "id": "20|nsf_________::d540e5e89b1ace31d5dd0b8f658056ec"}
{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::EE4840D3-84C0-47A3-9109-30F67D0D550F"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-03", "type": 20, "id": "20|rcuk________::7559c8bbff5125d74919775a1f290496"}
{"dateoftransformation": "2019-05-29", "originalId": ["irb_hr______::Institute of Physics, Zagreb"], "collectedfrom": [{"key": "10|openaire____::db600878200645bd752cf7fd96a37df5", "value": "Rudjer Boskovic Institute Library - Croatian Projects"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics, Zagreb"}, "country": {"classid": "HR", "classname": "Croatia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-06-15", "type": 20, "id": "20|irb_hr______::d0147c5dfa57d00b5bbd8405366d5ed9"}
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::918297740"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "false"}, "websiteurl": {"value": "http://www.ifv.nl"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "country": {"classid": "NL", "classname": "Netherlands", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::490e6333fc4b5b2f0bfbb94875b57911"}
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::987994083"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "false"}, "websiteurl": {"value": "http://www.ifv.nl"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "country": {"classid": "NL", "classname": "Netherlands", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::3ace7e70172b7ddce2ffc8db335e7cd3"}
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::999637672"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "ANL"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "true"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "A I ALIKHANYAN NATIONAL SCIENCE LABORATORY"}, "country": {"classid": "AM", "classname": "Armenia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::f76c86a31f38609cd3b7930279d9c7c6"}
{"dateoftransformation": "2019-04-16", "originalId": ["aka_________::3117bf00abc3330b48bb270494d46ce4"], "collectedfrom": [{"key": "10|openaire____::6ac933301a3933c8a22ceebea7000326", "value": "Academy of Finland"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2019-01-25", "type": 20, "id": "20|aka_________::c32beace3046af7a121b15237b1e4747"}
{"dateoftransformation":"2019-05-04","originalId":["opendoar____::Free_University_of_Bozen_-_Bolzano"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.unibz.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Free University of Bozen - Bolzano"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2019-05-04","type":20,"id":"20|opendoar____::c230b60ca1a8a95150c3163e40899e5d"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::E7C60D41-51F7-4C46-89DC-4E8F6D7DC64B"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Free University of Bozen-Bolzano"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-07","type":20,"id":"20|rcuk________::47a7d6a960f874fdd7c2678f16276cbf"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::69417031-F8F1-4557-BF08-49096CDBF321"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"University of the Free State"},"country":{"classid":"ZA","classname":"South Africa","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::91d972791a1a3945078724a4ede959d4"}
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::9218106A-E8CE-46A5-AABC-B4C8ED148690"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"University of Amsterdam"},"country":{"classid":"NL","classname":"Netherlands","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::77c8206d9739a62c542db14a00d51fc9"}
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Université_Libre_de_Bruxelles"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.ulb.ac.be/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Université Libre de Bruxelles"},"country":{"classid":"BE","classname":"Belgium","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::4348f2277945e85ff4fa371c89d5209e"}
{"dateoftransformation":"2018-09-13","originalId":["nsf_________::Liberty_University__Inc_"],"collectedfrom":[{"value":"NSF - National Science Foundation","key":"10|openaire____::dd69b4a1513c9de9f46faf24048da1e8"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Liberty University, Inc."},"country":{"classid":"US","classname":"United States","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-03-10","type":20,"id":"20|nsf_________::ef77f1e8314313a4d53ec4f19054b733"}
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Vrije_Universiteit_Amsterdam"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"VU"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.vu.nl/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Vrije Universiteit Amsterdam"},"country":{"classid":"NL","classname":"Netherlands","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::40e0928728ca1ea6ebb147ad307fc7db"}
{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Burgas_Free_University"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Бургаски свободен университет"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.bfu.bg/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Burgas Free University"},"country":{"classid":"BG","classname":"Bulgaria","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::28a99bd2330504b0dfb6c44192757bde"}
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Université_libre_de_Bruxelles"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.ulb.ac.be/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Université libre de Bruxelles"},"country":{"classid":"BE","classname":"Belgium","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-07-21","type":20,"id":"20|opendoar____::0e3d292f95a8f13fed04d7b3ac872b9f"}
{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Freie_Universitat_Berlin"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Freie Universitat Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::5054b113a655361d929493a95d29e6f1"}

View File

@ -35,11 +35,32 @@ public abstract class AbstractCondition extends AbstractPaceFunctions implements
final Field va = a.values(fd.getName());
final Field vb = b.values(fd.getName());
if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) {
res.put(fd.getName(), new ConditionEval(cond, va, vb, 0));
} else {
if (fd.isIgnoreMissing()) {
res.put(fd.getName(), verify(fd, va, vb));
}
else {
if (va.isEmpty() || vb.isEmpty()) {
res.put(fd.getName(), new ConditionEval(cond, va, vb, -1));
}
else {
res.put(fd.getName(), verify(fd, va, vb));
}
}
// // if ignoreMissing=true always return undefined (0) in case of missing
// if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) {
// res.put(fd.getName(), new ConditionEval(cond, va, vb, 0));
// } else {
// if (va.isEmpty()&&vb.isEmpty()) {
// res.put(fd.getName(), new ConditionEval(cond, va, vb, -1));
// }
// else {
// res.put(fd.getName(), verify(fd, va, vb));
// }
// }
}
return res;
}

View File

@ -21,6 +21,8 @@ public class DomainExactMatch extends ExactMatchIgnoreCase {
private URL asUrl(final String value) {
try {
if (value.isEmpty())
return new URL("http://");
return new URL(value);
} catch (MalformedURLException e) {
// should not happen as checked by pace typing

View File

@ -27,7 +27,14 @@ public class ExactMatch extends AbstractCondition {
int res;
if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) {
// if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) {
// res = 0;
// } else {
// res = fa.equals(fb) ? 1 : -1;
// }
//if there is a blank, undefined result
if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) {
res = 0;
} else {
res = fa.equals(fb) ? 1 : -1;

View File

@ -5,6 +5,7 @@ import java.util.List;
import eu.dnetlib.pace.distance.eval.ConditionEval;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.FieldDef;
import org.apache.commons.lang.StringUtils;
/**
* The Class ExactMatch.
@ -24,7 +25,15 @@ public class ExactMatchIgnoreCase extends AbstractCondition {
final String fa = getValue(a);
final String fb = getValue(b);
return new ConditionEval(cond, a, b, fa.equalsIgnoreCase(fb) ? 1 : -1);
int res;
if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) {
res = 0;
} else {
res = fa.equalsIgnoreCase(fb) ? 1 : -1;
}
return new ConditionEval(cond, a, b, res);
}
protected String getValue(final Field f) {

View File

@ -1,4 +1,4 @@
key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;Uniwersytet;университет;universiteit;πανεπιστήμιο
key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;universitaet;Uniwersytet;университет;universiteit;πανεπιστήμιο
key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές
key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα
key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο

1 key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;Uniwersytet;университет;universiteit;πανεπιστήμιο key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;universitaet;Uniwersytet;университет;universiteit;πανεπιστήμιο
2 key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές
3 key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα
4 key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο

View File

@ -48,9 +48,10 @@ public class DistanceAlgoTest extends AbstractPaceFunctions {
@Test
public void testJaroWinklerNormalizedName() {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("Universita di Pisa", "Universita di Parma");
double result = jaroWinklerNormalizedName.distance("Free University of Bozen-Bolzano", "University of the Free State");
assertEquals(result, 0.0);
System.out.println("result = " + result);
assertEquals(1.0, result);
}
@Test