diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java index 4dc633d55..0f8350eee 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java @@ -18,12 +18,12 @@ class JsonPathTest { @Test void jsonToModelTest() throws IOException { DedupConfig conf = DedupConfig - .load( - IOUtils - .toString( - SparkOpenorgsDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); + .load( + IOUtils + .toString( + SparkOpenorgsDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); final String org = IOUtils.toString(getClass().getResourceAsStream("organization_example1.json")); @@ -58,7 +58,7 @@ class JsonPathTest { void testJPath2() throws IOException { DedupConfig conf = DedupConfig - .load(IOUtils.toString(getClass().getResourceAsStream("dedup_conf_dataset.json"))); + .load(IOUtils.toString(getClass().getResourceAsStream("dedup_conf_dataset.json"))); final String dat = IOUtils.toString(getClass().getResourceAsStream("dataset_example1.json")); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/local/DedupLocalTestUtils.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/local/DedupLocalTestUtils.java index e584142a9..1ab620062 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/local/DedupLocalTestUtils.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/local/DedupLocalTestUtils.java @@ -1,12 +1,11 @@ + package eu.dnetlib.dhp.oa.dedup.local; -import com.cloudera.com.fasterxml.jackson.core.JsonFactory; -import com.cloudera.com.fasterxml.jackson.databind.JsonNode; -import com.cloudera.com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.pace.config.DedupConfig; +import java.io.*; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + import org.apache.commons.beanutils.BeanUtils; import org.apache.commons.collections4.IteratorUtils; import org.apache.commons.io.IOUtils; @@ -16,178 +15,206 @@ import org.apache.hadoop.fs.Path; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.spark_project.guava.hash.Hashing; + +import com.cloudera.com.fasterxml.jackson.core.JsonFactory; +import com.cloudera.com.fasterxml.jackson.databind.JsonNode; +import com.cloudera.com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.pace.config.DedupConfig; import scala.collection.JavaConverters; import scala.collection.convert.Wrappers; import scala.collection.mutable.ArrayBuffer; -import java.io.*; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; - public abstract class DedupLocalTestUtils { - public static String prepareTable(Row doc) { - StringBuilder ret = new StringBuilder(""); + public static String prepareTable(Row doc) { + StringBuilder ret = new StringBuilder("
"); - for(String fieldName: doc.schema().fieldNames()) { - Object value = doc.getAs(fieldName); - if(value.getClass() == String.class){ - ret.append(""); - } - else if(value.getClass() == Wrappers.JListWrapper.class) { - List values = IteratorUtils.toList(JavaConverters.asJavaIteratorConverter(((Wrappers.JListWrapper) value).iterator()).asJava()) - .stream() - .map(DedupLocalTestUtils::takeValue) - .collect(Collectors.toList()); - ret.append(""); - } - else if(value.getClass() == ArrayBuffer.class){ - List values = new ArrayList<>(IteratorUtils.toList(JavaConverters.asJavaIteratorConverter(((ArrayBuffer) value).iterator()).asJava())); - ret.append(""); - } + for (String fieldName : doc.schema().fieldNames()) { + Object value = doc.getAs(fieldName); + if (value.getClass() == String.class) { + ret.append(""); + } else if (value.getClass() == Wrappers.JListWrapper.class) { + List values = IteratorUtils + .toList( + JavaConverters + .asJavaIteratorConverter(((Wrappers.JListWrapper) value).iterator()) + .asJava()) + .stream() + .map(DedupLocalTestUtils::takeValue) + .collect(Collectors.toList()); + ret + .append(""); + } else if (value.getClass() == ArrayBuffer.class) { + List values = new ArrayList<>(IteratorUtils + .toList(JavaConverters.asJavaIteratorConverter(((ArrayBuffer) value).iterator()).asJava())); + ret + .append(""); + } - } + } - ret.append("
").append(fieldName).append("").append(value).append("
").append(fieldName).append("[").append(String.join(";", values)).append("]
").append(fieldName).append("[").append(String.join(";", values)).append("]
").append(fieldName).append("").append(value).append("
") + .append(fieldName) + .append("[") + .append(String.join(";", values)) + .append("]
") + .append(fieldName) + .append("[") + .append(String.join(";", values)) + .append("]
"); - return ret.toString(); + ret.append(""); + return ret.toString(); - } + } - protected static String fileToString(String filePath) throws IOException { + protected static String fileToString(String filePath) throws IOException { - Path path=new Path(filePath); - FileSystem fs = FileSystem.get(new Configuration()); - BufferedReader br=new BufferedReader(new InputStreamReader(fs.open(path))); - try { - return String.join("", br.lines().collect(Collectors.toList())); - } finally { - br.close(); - } - } + Path path = new Path(filePath); + FileSystem fs = FileSystem.get(new Configuration()); + BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); + try { + return String.join("", br.lines().collect(Collectors.toList())); + } finally { + br.close(); + } + } - public static void prepareGraphParams(Dataset entities, Dataset simRels, String filePath, String templateFilePath) { + public static void prepareGraphParams(Dataset entities, Dataset simRels, String filePath, + String templateFilePath) { - List vertexes = entities.toJavaRDD().map(r -> r.getAs("identifier").toString()).collect(); + List vertexes = entities.toJavaRDD().map(r -> r.getAs("identifier").toString()).collect(); - List nodes = entities.toJavaRDD().map(e -> new Node(e.getAs("identifier").toString(), vertexes.indexOf(e.getAs("identifier").toString()), prepareTable(e))).collect(); + List nodes = entities + .toJavaRDD() + .map( + e -> new Node(e.getAs("identifier").toString(), vertexes.indexOf(e.getAs("identifier").toString()), + prepareTable(e))) + .collect(); - List edges = simRels.toJavaRDD().collect().stream().map(sr -> new Edge(vertexes.indexOf(sr.getSource()), vertexes.indexOf(sr.getTarget()))).collect(Collectors.toList()); + List edges = simRels + .toJavaRDD() + .collect() + .stream() + .map(sr -> new Edge(vertexes.indexOf(sr.getSource()), vertexes.indexOf(sr.getTarget()))) + .collect(Collectors.toList()); - try(FileWriter fw = new FileWriter(filePath)) { - String fullText = IOUtils.toString(new FileReader(templateFilePath)); + try (FileWriter fw = new FileWriter(filePath)) { + String fullText = IOUtils.toString(new FileReader(templateFilePath)); - String s = fullText - .replaceAll("%nodes%", new ObjectMapper().writeValueAsString(nodes)) - .replaceAll("%edges%", new ObjectMapper().writeValueAsString(edges)); + String s = fullText + .replaceAll("%nodes%", new ObjectMapper().writeValueAsString(nodes)) + .replaceAll("%edges%", new ObjectMapper().writeValueAsString(edges)); - IOUtils.write(s, fw); - } catch (IOException e) { - e.printStackTrace(); - } + IOUtils.write(s, fw); + } catch (IOException e) { + e.printStackTrace(); + } - } + } - public static long hash(final String id) { - return Hashing.murmur3_128().hashString(id).asLong(); - } + public static long hash(final String id) { + return Hashing.murmur3_128().hashString(id).asLong(); + } - public static Relation createRel(String source, String target, String relClass, DedupConfig dedupConf) { + public static Relation createRel(String source, String target, String relClass, DedupConfig dedupConf) { - String entityType = dedupConf.getWf().getEntityType(); + String entityType = dedupConf.getWf().getEntityType(); - Relation r = new Relation(); - r.setSource(source); - r.setTarget(target); - r.setRelClass(relClass); - r.setRelType(entityType + entityType.substring(0, 1).toUpperCase() + entityType.substring(1)); - r.setSubRelType(ModelConstants.DEDUP); - return r; - } + Relation r = new Relation(); + r.setSource(source); + r.setTarget(target); + r.setRelClass(relClass); + r.setRelType(entityType + entityType.substring(0, 1).toUpperCase() + entityType.substring(1)); + r.setSubRelType(ModelConstants.DEDUP); + return r; + } - public static OafEntity createOafEntity(String id, OafEntity base, long ts) { - try { - OafEntity res = (OafEntity) BeanUtils.cloneBean(base); - res.setId(id); - res.setLastupdatetimestamp(ts); - return res; - } catch (Exception e) { - throw new RuntimeException(e); - } - } + public static OafEntity createOafEntity(String id, OafEntity base, long ts) { + try { + OafEntity res = (OafEntity) BeanUtils.cloneBean(base); + res.setId(id); + res.setLastupdatetimestamp(ts); + return res; + } catch (Exception e) { + throw new RuntimeException(e); + } + } - public static String takeValue(String json) { - ObjectMapper mapper = new ObjectMapper(new JsonFactory()); - try { - JsonNode rootNode = mapper.readTree(json); - return rootNode.get("value").toString().replaceAll("\"", ""); + public static String takeValue(String json) { + ObjectMapper mapper = new ObjectMapper(new JsonFactory()); + try { + JsonNode rootNode = mapper.readTree(json); + return rootNode.get("value").toString().replaceAll("\"", ""); - } catch (Exception e) { - return json; - } + } catch (Exception e) { + return json; + } - } + } } -class Node implements Serializable{ - String label; - int id; - String title; +class Node implements Serializable { + String label; + int id; + String title; - public Node(String label, int id, String title) { - this.label = label; - this.id = id; - this.title = title; - } + public Node(String label, int id, String title) { + this.label = label; + this.id = id; + this.title = title; + } - public String getLabel() { - return label; - } + public String getLabel() { + return label; + } - public void setLabel(String label) { - this.label = label; - } + public void setLabel(String label) { + this.label = label; + } - public int getId() { - return id; - } + public int getId() { + return id; + } - public void setId(int id) { - this.id = id; - } + public void setId(int id) { + this.id = id; + } - public String getTitle() { - return title; - } + public String getTitle() { + return title; + } - public void setTitle(String title) { - this.title = title; - } + public void setTitle(String title) { + this.title = title; + } } -class Edge implements Serializable{ - int from; - int to; +class Edge implements Serializable { + int from; + int to; - public Edge(int from, int to) { - this.from = from; - this.to = to; - } + public Edge(int from, int to) { + this.from = from; + this.to = to; + } - public int getFrom() { - return from; - } + public int getFrom() { + return from; + } - public void setFrom(int from) { - this.from = from; - } + public void setFrom(int from) { + this.from = from; + } - public int getTo() { - return to; - } + public int getTo() { + return to; + } - public void setTo(int to) { - this.to = to; - } + public void setTo(int to) { + this.to = to; + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/local/SparkDedupLocalTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/local/SparkDedupLocalTest.java index 490b55481..662c468e0 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/local/SparkDedupLocalTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/local/SparkDedupLocalTest.java @@ -1,19 +1,15 @@ + package eu.dnetlib.dhp.oa.dedup.local; -import com.google.common.collect.Lists; -import com.kwartile.lib.cc.ConnectedComponent; -import eu.dnetlib.dhp.oa.dedup.DedupUtility; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; -import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.model.SparkDeduper; -import eu.dnetlib.pace.model.SparkModel; -import eu.dnetlib.pace.tree.support.TreeProcessor; +import java.awt.*; +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Paths; +import java.util.*; +import java.util.List; +import java.util.stream.Stream; + import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.FlatMapGroupsFunction; @@ -26,281 +22,309 @@ import org.junit.jupiter.api.*; import org.junit.jupiter.api.extension.ExtendWith; import org.junit.platform.commons.util.StringUtils; import org.mockito.junit.jupiter.MockitoExtension; + +import com.google.common.collect.Lists; +import com.kwartile.lib.cc.ConnectedComponent; + +import eu.dnetlib.dhp.oa.dedup.DedupUtility; +import eu.dnetlib.dhp.schema.common.EntityType; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; +import eu.dnetlib.pace.config.DedupConfig; +import eu.dnetlib.pace.model.SparkDeduper; +import eu.dnetlib.pace.model.SparkModel; +import eu.dnetlib.pace.tree.support.TreeProcessor; import scala.Tuple2; import scala.Tuple3; import scala.collection.JavaConversions; import scala.collection.mutable.WrappedArray; -import java.awt.*; -import java.io.File; -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.file.Paths; -import java.util.List; -import java.util.*; -import java.util.stream.Stream; - @ExtendWith(MockitoExtension.class) @TestMethodOrder(MethodOrderer.OrderAnnotation.class) @TestInstance(TestInstance.Lifecycle.PER_CLASS) public class SparkDedupLocalTest extends DedupLocalTestUtils { - static SparkSession spark; - static DedupConfig config; - static JavaSparkContext context; + static SparkSession spark; + static DedupConfig config; + static JavaSparkContext context; + + final String entitiesPath = Paths + .get( + Objects + .requireNonNull(SparkDedupLocalTest.class.getResource("/eu/dnetlib/dhp/dedup/entities/publication")) + .toURI()) + .toFile() + .getAbsolutePath(); + + final String dedupConfPath = Paths + .get( + Objects + .requireNonNull(SparkDedupLocalTest.class.getResource("/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json")) + .toURI()) + .toFile() + .getAbsolutePath(); + + final static int MAX_ACCEPTANCE_DATE = 20; + + private static SparkDeduper deduper; + private static SparkModel model; + + public SparkDedupLocalTest() throws URISyntaxException { + } + + @BeforeAll + public void setup() throws IOException { + + config = DedupConfig.load(fileToString(dedupConfPath)); + + spark = SparkSession + .builder() + .appName("Deduplication") + .master("local[*]") + .getOrCreate(); + context = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + deduper = new SparkDeduper(config); + + model = new SparkModel(config); + + } + + @Test // full deduplication workflow test + @Disabled + public void deduplicationTest() { + + long before_simrels = System.currentTimeMillis(); + + Dataset entities = spark + .read() + .textFile(entitiesPath) + .transform(deduper.model().parseJsonDataset()); + + Dataset simRels = entities + .transform(deduper.dedup()) + .distinct() + .map( + (MapFunction) t -> DedupUtility + .createSimRel( + t.getStruct(0).getString(0), t.getStruct(0).getString(1), config.getWf().getEntityType()), + Encoders.bean(Relation.class)); + + long simrels_time = System.currentTimeMillis() - before_simrels; + + long simrels_number = simRels.count(); + + long before_mergerels = System.currentTimeMillis(); + + UserDefinedFunction hashUDF = functions + .udf( + (String s) -> hash(s), DataTypes.LongType); + + // + Dataset vertexIdMap = simRels + .select("source", "target") + .selectExpr("source as id") + .union(simRels.selectExpr("target as id")) + .distinct() + .withColumn("vertexId", hashUDF.apply(functions.col("id"))); + + // transform simrels into pairs of numeric ids + final Dataset edges = simRels + .select("source", "target") + .withColumn("source", hashUDF.apply(functions.col("source"))) + .withColumn("target", hashUDF.apply(functions.col("target"))); + + for (Relation r : simRels.toJavaRDD().collect()) { + System.out.println(r.getSource() + " ---> " + r.getTarget()); + } + + // resolve connected components + // ("vertexId", "groupId") + Dataset cliques = ConnectedComponent + .runOnPairs(edges, 50, spark); + + // transform "vertexId" back to its original string value + // groupId is kept numeric as its string value is not used + // ("id", "groupId") + Dataset mergeRels = cliques + .join(vertexIdMap, JavaConversions.asScalaBuffer(Collections.singletonList("vertexId")), "inner") + .drop("vertexId") + .distinct() + .flatMap((FlatMapFunction) (Row r) -> { + ArrayList res = new ArrayList<>(); + + String id = r.getAs("id"); + String groupId = r.getAs("groupId").toString(); + res.add(createRel(groupId, id, ModelConstants.MERGES, config)); + res.add(createRel(id, groupId, ModelConstants.IS_MERGED_IN, config)); + + return res.iterator(); + }, Encoders.bean(Relation.class)); + + long mergerels_time = System.currentTimeMillis() - before_mergerels; + + long mergerels_number = mergeRels.count(); + + long before_dedupentity = System.currentTimeMillis(); + + final Class clazz = ModelSupport.entityTypes + .get(EntityType.valueOf(config.getWf().getSubEntityValue())); + final Encoder beanEncoder = Encoders.bean(clazz); + final Encoder kryoEncoder = Encoders.kryo(clazz); + + Dataset kryoEntities = spark + .read() + .schema(Encoders.bean(clazz).schema()) + .json(entitiesPath) + .as(beanEncoder) + .map( + (MapFunction>) entity -> { + return new Tuple2<>(entity.getId(), entity); + }, + Encoders.tuple(Encoders.STRING(), kryoEncoder)) + .selectExpr("_1 AS id", "_2 AS kryoObject"); + + // : source is the dedup_id, target is the id of the mergedIn + Dataset mergeRelsRow = mergeRels + .where("relClass == 'merges'") + .selectExpr("source as dedupId", "target as id"); + + Dataset dedupRecords = mergeRelsRow + .join(kryoEntities, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") + .select("dedupId", "id", "kryoObject") + .as(Encoders.tuple(Encoders.STRING(), Encoders.STRING(), kryoEncoder)) + .groupByKey((MapFunction, String>) Tuple3::_1, Encoders.STRING()) + .flatMapGroups( + (FlatMapGroupsFunction, OafEntity>) (dedupId, it) -> { + if (!it.hasNext()) + return Collections.emptyIterator(); + + final ArrayList cliques_ = new ArrayList<>(); + + final ArrayList aliases = new ArrayList<>(); + + final HashSet acceptanceDate = new HashSet<>(); + + while (it.hasNext()) { + Tuple3 t = it.next(); + OafEntity entity = t._3(); + + if (entity == null) { + aliases.add(t._2()); + } else { + cliques_.add(entity); + + if (acceptanceDate.size() < MAX_ACCEPTANCE_DATE) { // max acceptance date + if (Result.class.isAssignableFrom(entity.getClass())) { + Result result = (Result) entity; + if (result.getDateofacceptance() != null + && StringUtils.isNotBlank(result.getDateofacceptance().getValue())) { + acceptanceDate.add(result.getDateofacceptance().getValue()); + } + } + } + } + + } + + if (acceptanceDate.size() >= MAX_ACCEPTANCE_DATE || cliques_.isEmpty()) { + return Collections.emptyIterator(); + } + + OafEntity mergedEntity = MergeUtils.mergeGroup(dedupId, cliques_.iterator()); + // dedup records do not have date of transformation attribute + mergedEntity.setDateoftransformation(null); + + return Stream + .concat( + Stream + .of(dedupId) + .map(id -> DedupLocalTestUtils.createOafEntity(id, mergedEntity, before_dedupentity)), + aliases + .stream() + .map(id -> DedupLocalTestUtils.createOafEntity(id, mergedEntity, before_dedupentity))) + .iterator(); + + }, beanEncoder); + + long dedupentity_time = System.currentTimeMillis() - before_dedupentity; + + long dedupentity_number = dedupRecords.count(); + + System.out.println("Number of simRels : " + simrels_number); + System.out.println("Number of mergeRels : " + mergerels_number); + System.out.println("Number of dedupEntities : " + dedupentity_number); + System.out.println("Total time for simRels creation : " + simrels_time); + System.out.println("Total time for mergeRels creation : " + mergerels_time); + System.out.println("Total time for dedupEntities creation : " + dedupentity_time); + + showGraph(entities, simRels); + + } + + @Test // test the match between two JSON + @Disabled + public void matchTest() { + + String json1 = "{\"geolocation\": [], \"dataInfo\": {\"deletedbyinference\": true, \"provenanceaction\": {\"classid\": \"sysimport:dedup\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"dedup-result-decisiontree-v4\", \"invisible\": false, \"trust\": \"0.8\"}, \"resourcetype\": {\"classid\": \"UNKNOWN\", \"classname\": \"UNKNOWN\", \"schemename\": \"dnet:dataCite_resource\", \"schemeid\": \"dnet:dataCite_resource\"}, \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}, {\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/a5v4-hfh\"}, {\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/cg41-q6u\"}], \"bestaccessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"relevantdate\": [{\"qualifier\": {\"classid\": \"issued\", \"classname\": \"issued\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-05-22\"}, {\"qualifier\": {\"classid\": \"available\", \"classname\": \"available\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-03-31\"}], \"contributor\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"Amunts, Katrin\"}], \"id\": \"50|dedup_wf_002::639e90885a3cccbe75746679ff5492c7\", \"description\": [{\"value\": \"This dataset contains the distinct probabilistic cytoarchitectonic map of Area 5Ci (SPL) in the individual, single subject template of the MNI Colin 27 reference space. As part of the Julich-Brain cytoarchitectonic atlas, the area was identified using classical histological criteria and quantitative cytoarchitectonic analysis on cell-body-stained histological sections of 10 human postmortem brains obtained from the body donor program of the University of D\\\\u00fcsseldorf. The results of the cytoarchitectonic analysis were then mapped to the reference space, where each voxel was assigned the probability to belong to Area 5Ci (SPL). The probability map of Area 5Ci (SPL) is provided in NifTi format for each hemisphere in the reference space. The Julich-Brain atlas relies on a modular, flexible and adaptive framework containing workflows to create the probabilistic brain maps for these structures. Note that methodological improvements and updated probability estimates for new brain structures may in some cases lead to measurable but negligible deviations of existing probability maps, as compared to earlier released datasets. Other available data versions of Area 5Ci (SPL): Scheperjans et al. (2019) [Data set, v8.4] [DOI: 10.25493/SQVP-GK1](https://doi.org/10.25493%2FSQVP-GK1) The most probable delineation of Area 5Ci (SPL) derived from the calculation of a maximum probability map of all currently released Julich-Brain brain structures can be found here: Amunts et al. (2019) [Data set, v1.13] [DOI: 10.25493/Q3ZS-NV6](https://doi.org/10.25493%2FQ3ZS-NV6) Amunts et al. (2019) [Data set, v1.18] [DOI: 10.25493/8EGG-ZAR](https://doi.org/10.25493%2F8EGG-ZAR) Amunts et al. (2020) [Data set, v2.2] [DOI: 10.25493/TAKY-64D](https://doi.org/10.25493%2FTAKY-64D)\"}], \"embargoenddate\": {\"value\": \"2018-03-31\"}, \"lastupdatetimestamp\": 1725556617730, \"author\": [{\"surname\": \"Scheperjans\", \"name\": \"F.\", \"pid\": [], \"rank\": 1, \"affiliation\": [], \"fullname\": \"Scheperjans, F.\"}, {\"surname\": \"Hermann\", \"name\": \"K.\", \"pid\": [], \"rank\": 2, \"affiliation\": [], \"fullname\": \"Hermann, K.\"}, {\"surname\": \"Eickhoff\", \"name\": \"S. B.\", \"pid\": [], \"rank\": 3, \"affiliation\": [], \"fullname\": \"Eickhoff, S. B.\"}, {\"surname\": \"Amunts\", \"name\": \"K.\", \"pid\": [], \"rank\": 4, \"affiliation\": [], \"fullname\": \"Amunts, K.\"}, {\"surname\": \"Schleicher\", \"name\": \"A.\", \"pid\": [], \"rank\": 5, \"affiliation\": [], \"fullname\": \"Schleicher, A.\"}, {\"surname\": \"Hoemke\", \"name\": \"L.\", \"pid\": [], \"rank\": 6, \"affiliation\": [], \"fullname\": \"Hoemke, L.\"}, {\"surname\": \"Mohlberg\", \"name\": \"H.\", \"pid\": [], \"rank\": 7, \"affiliation\": [], \"fullname\": \"Mohlberg, H.\"}, {\"surname\": \"Zilles\", \"name\": \"K.\", \"pid\": [], \"rank\": 8, \"affiliation\": [], \"fullname\": \"Zilles, K.\"}], \"collectedfrom\": [{\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}], \"instance\": [{\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/bq0h-znc\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/a5v4-hfh\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/a5v4-hfh\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/cg41-q6u\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/cg41-q6u\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"Unknown Repository\", \"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\"}, \"license\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"CC BY NC SA\"}, \"url\": [\"https://doi.org/10.25493/bq0h-znc\"], \"pid\": [], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"alternateIdentifier\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"dateofacceptance\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}, \"accessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"Unknown Repository\", \"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\"}, \"license\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"CC BY NC SA\"}, \"url\": [\"https://doi.org/10.25493/a5v4-hfh\"], \"pid\": [], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"alternateIdentifier\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/a5v4-hfh\"}], \"dateofacceptance\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}, \"accessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"Unknown Repository\", \"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\"}, \"license\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"CC BY NC SA\"}, \"url\": [\"https://doi.org/10.25493/cg41-q6u\"], \"pid\": [], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"alternateIdentifier\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/cg41-q6u\"}], \"dateofacceptance\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}, \"accessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}], \"version\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"null\"}, \"storagedate\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"dateofcollection\": \"2024-09-05T17:19:05.982\", \"metaResourceType\": {\"classid\": \"Research Data\", \"classname\": \"Research Data\", \"schemename\": \"openaire::meta_resource_types\", \"schemeid\": \"openaire::meta_resource_types\"}, \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"format\": [], \"subject\": [{\"qualifier\": {\"classid\": \"keyword\", \"classname\": \"keyword\", \"schemename\": \"dnet:subject_classification_typologies\", \"schemeid\": \"dnet:subject_classification_typologies\"}, \"value\": \"Neuroscience\"}], \"coverage\": [], \"externalReference\": [], \"publisher\": {\"value\": \"Human Brain Project Neuroinformatics Platform\"}, \"eoscifguidelines\": [], \"language\": {\"classid\": \"und\", \"classname\": \"Undetermined\", \"schemename\": \"dnet:languages\", \"schemeid\": \"dnet:languages\"}, \"resulttype\": {\"classid\": \"dataset\", \"classname\": \"dataset\", \"schemename\": \"dnet:result_typologies\", \"schemeid\": \"dnet:result_typologies\"}, \"country\": [], \"extraInfo\": [], \"originalId\": [\"50|datacite____::7f98d6c254b0d743c4634412ed94817b\", \"10.25493/bq0h-znc\", \"50|datacite____::4bc8e949f3238624d64980b21cbf8441\", \"10.25493/a5v4-hfh\", \"50|datacite____::098cabeb288616a6a4ae7e950f2a372e\", \"10.25493/cg41-q6u\", \"50|sygma_______::7f98d6c254b0d743c4634412ed94817b\", \"50|sygma_______::4bc8e949f3238624d64980b21cbf8441\", \"50|sygma_______::098cabeb288616a6a4ae7e950f2a372e\"], \"source\": [], \"context\": [{\"dataInfo\": [{\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}], \"id\": \"fet-h2020::flag::85792436c02c::785907\"}, {\"dataInfo\": [{\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}], \"id\": \"fet-h2020::flag::91670347c2a7::945539\"}], \"title\": [{\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5Ci (SPL) (v8.2)\"}, {\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5L (SPL) (v8.2)\"}, {\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5M (SPL) (v8.2)\"}]}"; + String json2 = "{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": true, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"resourcetype\": {\"classid\": \"UNKNOWN\", \"classname\": \"UNKNOWN\", \"schemename\": \"dnet:dataCite_resource\", \"schemeid\": \"dnet:dataCite_resource\"}, \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"bestaccessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"relevantdate\": [{\"qualifier\": {\"classid\": \"issued\", \"classname\": \"issued\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-05-22\"}, {\"qualifier\": {\"classid\": \"available\", \"classname\": \"available\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-03-31\"}], \"collectedfrom\": [{\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}], \"id\": \"50|doi_________::7f98d6c254b0d743c4634412ed94817b\", \"subject\": [{\"qualifier\": {\"classid\": \"keyword\", \"classname\": \"keyword\", \"schemename\": \"dnet:subject_classification_typologies\", \"schemeid\": \"dnet:subject_classification_typologies\"}, \"value\": \"Neuroscience\"}], \"embargoenddate\": {\"value\": \"2018-03-31\"}, \"author\": [{\"surname\": \"Scheperjans\", \"name\": \"F.\", \"pid\": [], \"rank\": 1, \"affiliation\": [], \"fullname\": \"Scheperjans, F.\"}, {\"surname\": \"Hermann\", \"name\": \"K.\", \"pid\": [], \"rank\": 2, \"affiliation\": [], \"fullname\": \"Hermann, K.\"}, {\"surname\": \"Eickhoff\", \"name\": \"S. B.\", \"pid\": [], \"rank\": 3, \"affiliation\": [], \"fullname\": \"Eickhoff, S. B.\"}, {\"surname\": \"Amunts\", \"name\": \"K.\", \"pid\": [], \"rank\": 4, \"affiliation\": [], \"fullname\": \"Amunts, K.\"}, {\"surname\": \"Schleicher\", \"name\": \"A.\", \"pid\": [], \"rank\": 5, \"affiliation\": [], \"fullname\": \"Schleicher, A.\"}, {\"surname\": \"Hoemke\", \"name\": \"L.\", \"pid\": [], \"rank\": 6, \"affiliation\": [], \"fullname\": \"Hoemke, L.\"}, {\"surname\": \"Mohlberg\", \"name\": \"H.\", \"pid\": [], \"rank\": 7, \"affiliation\": [], \"fullname\": \"Mohlberg, H.\"}, {\"surname\": \"Zilles\", \"name\": \"K.\", \"pid\": [], \"rank\": 8, \"affiliation\": [], \"fullname\": \"Zilles, K.\"}], \"description\": [{\"value\": \"This dataset contains the distinct probabilistic cytoarchitectonic map of Area 5Ci (SPL) in the individual, single subject template of the MNI Colin 27 reference space. As part of the Julich-Brain cytoarchitectonic atlas, the area was identified using classical histological criteria and quantitative cytoarchitectonic analysis on cell-body-stained histological sections of 10 human postmortem brains obtained from the body donor program of the University of D\\\\u00fcsseldorf. The results of the cytoarchitectonic analysis were then mapped to the reference space, where each voxel was assigned the probability to belong to Area 5Ci (SPL). The probability map of Area 5Ci (SPL) is provided in NifTi format for each hemisphere in the reference space. The Julich-Brain atlas relies on a modular, flexible and adaptive framework containing workflows to create the probabilistic brain maps for these structures. Note that methodological improvements and updated probability estimates for new brain structures may in some cases lead to measurable but negligible deviations of existing probability maps, as compared to earlier released datasets. Other available data versions of Area 5Ci (SPL): Scheperjans et al. (2019) [Data set, v8.4] [DOI: 10.25493/SQVP-GK1](https://doi.org/10.25493%2FSQVP-GK1) The most probable delineation of Area 5Ci (SPL) derived from the calculation of a maximum probability map of all currently released Julich-Brain brain structures can be found here: Amunts et al. (2019) [Data set, v1.13] [DOI: 10.25493/Q3ZS-NV6](https://doi.org/10.25493%2FQ3ZS-NV6) Amunts et al. (2019) [Data set, v1.18] [DOI: 10.25493/8EGG-ZAR](https://doi.org/10.25493%2F8EGG-ZAR) Amunts et al. (2020) [Data set, v2.2] [DOI: 10.25493/TAKY-64D](https://doi.org/10.25493%2FTAKY-64D)\"}], \"instance\": [{\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/bq0h-znc\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}], \"dateofcollection\": \"2021-07-20T01:01:03+0000\", \"metaResourceType\": {\"classid\": \"Research Data\", \"classname\": \"Research Data\", \"schemename\": \"openaire::meta_resource_types\", \"schemeid\": \"openaire::meta_resource_types\"}, \"dateoftransformation\": \"2021-07-20T01:01:03+0000\", \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"publisher\": {\"value\": \"Human Brain Project Neuroinformatics Platform\"}, \"language\": {\"classid\": \"und\", \"classname\": \"Undetermined\", \"schemename\": \"dnet:languages\", \"schemeid\": \"dnet:languages\"}, \"resulttype\": {\"classid\": \"dataset\", \"classname\": \"dataset\", \"schemename\": \"dnet:result_typologies\", \"schemeid\": \"dnet:result_typologies\"}, \"title\": [{\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5Ci (SPL) (v8.2)\"}], \"originalId\": [\"50|datacite____::7f98d6c254b0d743c4634412ed94817b\", \"10.25493/bq0h-znc\"], \"context\": []}"; + + Row a = model.rowFromJson(json1); + Row b = model.rowFromJson(json2); + + boolean result = new TreeProcessor(config).compare(a, b); + + System.out.println("Tree Processor Result = " + result); + + } + + @Test // test the keys between two JSON + @Disabled + public void blockingTest() { + + String json1 = "{\"geolocation\": [], \"dataInfo\": {\"deletedbyinference\": true, \"provenanceaction\": {\"classid\": \"sysimport:dedup\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"dedup-result-decisiontree-v4\", \"invisible\": false, \"trust\": \"0.8\"}, \"resourcetype\": {\"classid\": \"UNKNOWN\", \"classname\": \"UNKNOWN\", \"schemename\": \"dnet:dataCite_resource\", \"schemeid\": \"dnet:dataCite_resource\"}, \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}, {\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/a5v4-hfh\"}, {\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/cg41-q6u\"}], \"bestaccessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"relevantdate\": [{\"qualifier\": {\"classid\": \"issued\", \"classname\": \"issued\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-05-22\"}, {\"qualifier\": {\"classid\": \"available\", \"classname\": \"available\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-03-31\"}], \"contributor\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"Amunts, Katrin\"}], \"id\": \"50|dedup_wf_002::639e90885a3cccbe75746679ff5492c7\", \"description\": [{\"value\": \"This dataset contains the distinct probabilistic cytoarchitectonic map of Area 5Ci (SPL) in the individual, single subject template of the MNI Colin 27 reference space. As part of the Julich-Brain cytoarchitectonic atlas, the area was identified using classical histological criteria and quantitative cytoarchitectonic analysis on cell-body-stained histological sections of 10 human postmortem brains obtained from the body donor program of the University of D\\\\u00fcsseldorf. The results of the cytoarchitectonic analysis were then mapped to the reference space, where each voxel was assigned the probability to belong to Area 5Ci (SPL). The probability map of Area 5Ci (SPL) is provided in NifTi format for each hemisphere in the reference space. The Julich-Brain atlas relies on a modular, flexible and adaptive framework containing workflows to create the probabilistic brain maps for these structures. Note that methodological improvements and updated probability estimates for new brain structures may in some cases lead to measurable but negligible deviations of existing probability maps, as compared to earlier released datasets. Other available data versions of Area 5Ci (SPL): Scheperjans et al. (2019) [Data set, v8.4] [DOI: 10.25493/SQVP-GK1](https://doi.org/10.25493%2FSQVP-GK1) The most probable delineation of Area 5Ci (SPL) derived from the calculation of a maximum probability map of all currently released Julich-Brain brain structures can be found here: Amunts et al. (2019) [Data set, v1.13] [DOI: 10.25493/Q3ZS-NV6](https://doi.org/10.25493%2FQ3ZS-NV6) Amunts et al. (2019) [Data set, v1.18] [DOI: 10.25493/8EGG-ZAR](https://doi.org/10.25493%2F8EGG-ZAR) Amunts et al. (2020) [Data set, v2.2] [DOI: 10.25493/TAKY-64D](https://doi.org/10.25493%2FTAKY-64D)\"}], \"embargoenddate\": {\"value\": \"2018-03-31\"}, \"lastupdatetimestamp\": 1725556617730, \"author\": [{\"surname\": \"Scheperjans\", \"name\": \"F.\", \"pid\": [], \"rank\": 1, \"affiliation\": [], \"fullname\": \"Scheperjans, F.\"}, {\"surname\": \"Hermann\", \"name\": \"K.\", \"pid\": [], \"rank\": 2, \"affiliation\": [], \"fullname\": \"Hermann, K.\"}, {\"surname\": \"Eickhoff\", \"name\": \"S. B.\", \"pid\": [], \"rank\": 3, \"affiliation\": [], \"fullname\": \"Eickhoff, S. B.\"}, {\"surname\": \"Amunts\", \"name\": \"K.\", \"pid\": [], \"rank\": 4, \"affiliation\": [], \"fullname\": \"Amunts, K.\"}, {\"surname\": \"Schleicher\", \"name\": \"A.\", \"pid\": [], \"rank\": 5, \"affiliation\": [], \"fullname\": \"Schleicher, A.\"}, {\"surname\": \"Hoemke\", \"name\": \"L.\", \"pid\": [], \"rank\": 6, \"affiliation\": [], \"fullname\": \"Hoemke, L.\"}, {\"surname\": \"Mohlberg\", \"name\": \"H.\", \"pid\": [], \"rank\": 7, \"affiliation\": [], \"fullname\": \"Mohlberg, H.\"}, {\"surname\": \"Zilles\", \"name\": \"K.\", \"pid\": [], \"rank\": 8, \"affiliation\": [], \"fullname\": \"Zilles, K.\"}], \"collectedfrom\": [{\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}], \"instance\": [{\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/bq0h-znc\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/a5v4-hfh\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/a5v4-hfh\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/cg41-q6u\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/cg41-q6u\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"Unknown Repository\", \"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\"}, \"license\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"CC BY NC SA\"}, \"url\": [\"https://doi.org/10.25493/bq0h-znc\"], \"pid\": [], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"alternateIdentifier\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"dateofacceptance\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}, \"accessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"Unknown Repository\", \"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\"}, \"license\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"CC BY NC SA\"}, \"url\": [\"https://doi.org/10.25493/a5v4-hfh\"], \"pid\": [], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"alternateIdentifier\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/a5v4-hfh\"}], \"dateofacceptance\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}, \"accessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"Unknown Repository\", \"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\"}, \"license\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"CC BY NC SA\"}, \"url\": [\"https://doi.org/10.25493/cg41-q6u\"], \"pid\": [], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"alternateIdentifier\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/cg41-q6u\"}], \"dateofacceptance\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}, \"accessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}], \"version\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"null\"}, \"storagedate\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"dateofcollection\": \"2024-09-05T17:19:05.982\", \"metaResourceType\": {\"classid\": \"Research Data\", \"classname\": \"Research Data\", \"schemename\": \"openaire::meta_resource_types\", \"schemeid\": \"openaire::meta_resource_types\"}, \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"format\": [], \"subject\": [{\"qualifier\": {\"classid\": \"keyword\", \"classname\": \"keyword\", \"schemename\": \"dnet:subject_classification_typologies\", \"schemeid\": \"dnet:subject_classification_typologies\"}, \"value\": \"Neuroscience\"}], \"coverage\": [], \"externalReference\": [], \"publisher\": {\"value\": \"Human Brain Project Neuroinformatics Platform\"}, \"eoscifguidelines\": [], \"language\": {\"classid\": \"und\", \"classname\": \"Undetermined\", \"schemename\": \"dnet:languages\", \"schemeid\": \"dnet:languages\"}, \"resulttype\": {\"classid\": \"dataset\", \"classname\": \"dataset\", \"schemename\": \"dnet:result_typologies\", \"schemeid\": \"dnet:result_typologies\"}, \"country\": [], \"extraInfo\": [], \"originalId\": [\"50|datacite____::7f98d6c254b0d743c4634412ed94817b\", \"10.25493/bq0h-znc\", \"50|datacite____::4bc8e949f3238624d64980b21cbf8441\", \"10.25493/a5v4-hfh\", \"50|datacite____::098cabeb288616a6a4ae7e950f2a372e\", \"10.25493/cg41-q6u\", \"50|sygma_______::7f98d6c254b0d743c4634412ed94817b\", \"50|sygma_______::4bc8e949f3238624d64980b21cbf8441\", \"50|sygma_______::098cabeb288616a6a4ae7e950f2a372e\"], \"source\": [], \"context\": [{\"dataInfo\": [{\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}], \"id\": \"fet-h2020::flag::85792436c02c::785907\"}, {\"dataInfo\": [{\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}], \"id\": \"fet-h2020::flag::91670347c2a7::945539\"}], \"title\": [{\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5Ci (SPL) (v8.2)\"}, {\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5L (SPL) (v8.2)\"}, {\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5M (SPL) (v8.2)\"}]}"; + String json2 = "{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": true, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"resourcetype\": {\"classid\": \"UNKNOWN\", \"classname\": \"UNKNOWN\", \"schemename\": \"dnet:dataCite_resource\", \"schemeid\": \"dnet:dataCite_resource\"}, \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"bestaccessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"relevantdate\": [{\"qualifier\": {\"classid\": \"issued\", \"classname\": \"issued\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-05-22\"}, {\"qualifier\": {\"classid\": \"available\", \"classname\": \"available\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-03-31\"}], \"collectedfrom\": [{\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}], \"id\": \"50|doi_________::7f98d6c254b0d743c4634412ed94817b\", \"subject\": [{\"qualifier\": {\"classid\": \"keyword\", \"classname\": \"keyword\", \"schemename\": \"dnet:subject_classification_typologies\", \"schemeid\": \"dnet:subject_classification_typologies\"}, \"value\": \"Neuroscience\"}], \"embargoenddate\": {\"value\": \"2018-03-31\"}, \"author\": [{\"surname\": \"Scheperjans\", \"name\": \"F.\", \"pid\": [], \"rank\": 1, \"affiliation\": [], \"fullname\": \"Scheperjans, F.\"}, {\"surname\": \"Hermann\", \"name\": \"K.\", \"pid\": [], \"rank\": 2, \"affiliation\": [], \"fullname\": \"Hermann, K.\"}, {\"surname\": \"Eickhoff\", \"name\": \"S. B.\", \"pid\": [], \"rank\": 3, \"affiliation\": [], \"fullname\": \"Eickhoff, S. B.\"}, {\"surname\": \"Amunts\", \"name\": \"K.\", \"pid\": [], \"rank\": 4, \"affiliation\": [], \"fullname\": \"Amunts, K.\"}, {\"surname\": \"Schleicher\", \"name\": \"A.\", \"pid\": [], \"rank\": 5, \"affiliation\": [], \"fullname\": \"Schleicher, A.\"}, {\"surname\": \"Hoemke\", \"name\": \"L.\", \"pid\": [], \"rank\": 6, \"affiliation\": [], \"fullname\": \"Hoemke, L.\"}, {\"surname\": \"Mohlberg\", \"name\": \"H.\", \"pid\": [], \"rank\": 7, \"affiliation\": [], \"fullname\": \"Mohlberg, H.\"}, {\"surname\": \"Zilles\", \"name\": \"K.\", \"pid\": [], \"rank\": 8, \"affiliation\": [], \"fullname\": \"Zilles, K.\"}], \"description\": [{\"value\": \"This dataset contains the distinct probabilistic cytoarchitectonic map of Area 5Ci (SPL) in the individual, single subject template of the MNI Colin 27 reference space. As part of the Julich-Brain cytoarchitectonic atlas, the area was identified using classical histological criteria and quantitative cytoarchitectonic analysis on cell-body-stained histological sections of 10 human postmortem brains obtained from the body donor program of the University of D\\\\u00fcsseldorf. The results of the cytoarchitectonic analysis were then mapped to the reference space, where each voxel was assigned the probability to belong to Area 5Ci (SPL). The probability map of Area 5Ci (SPL) is provided in NifTi format for each hemisphere in the reference space. The Julich-Brain atlas relies on a modular, flexible and adaptive framework containing workflows to create the probabilistic brain maps for these structures. Note that methodological improvements and updated probability estimates for new brain structures may in some cases lead to measurable but negligible deviations of existing probability maps, as compared to earlier released datasets. Other available data versions of Area 5Ci (SPL): Scheperjans et al. (2019) [Data set, v8.4] [DOI: 10.25493/SQVP-GK1](https://doi.org/10.25493%2FSQVP-GK1) The most probable delineation of Area 5Ci (SPL) derived from the calculation of a maximum probability map of all currently released Julich-Brain brain structures can be found here: Amunts et al. (2019) [Data set, v1.13] [DOI: 10.25493/Q3ZS-NV6](https://doi.org/10.25493%2FQ3ZS-NV6) Amunts et al. (2019) [Data set, v1.18] [DOI: 10.25493/8EGG-ZAR](https://doi.org/10.25493%2F8EGG-ZAR) Amunts et al. (2020) [Data set, v2.2] [DOI: 10.25493/TAKY-64D](https://doi.org/10.25493%2FTAKY-64D)\"}], \"instance\": [{\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/bq0h-znc\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}], \"dateofcollection\": \"2021-07-20T01:01:03+0000\", \"metaResourceType\": {\"classid\": \"Research Data\", \"classname\": \"Research Data\", \"schemename\": \"openaire::meta_resource_types\", \"schemeid\": \"openaire::meta_resource_types\"}, \"dateoftransformation\": \"2021-07-20T01:01:03+0000\", \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"publisher\": {\"value\": \"Human Brain Project Neuroinformatics Platform\"}, \"language\": {\"classid\": \"und\", \"classname\": \"Undetermined\", \"schemename\": \"dnet:languages\", \"schemeid\": \"dnet:languages\"}, \"resulttype\": {\"classid\": \"dataset\", \"classname\": \"dataset\", \"schemename\": \"dnet:result_typologies\", \"schemeid\": \"dnet:result_typologies\"}, \"title\": [{\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5Ci (SPL) (v8.2)\"}], \"originalId\": [\"50|datacite____::7f98d6c254b0d743c4634412ed94817b\", \"10.25493/bq0h-znc\"], \"context\": []}"; + + Row a = model.rowFromJson(json1); + Row b = model.rowFromJson(json2); + + List rows = Lists.newArrayList(a, b); + + Dataset rowsDS = spark + .createDataset(rows, RowEncoder.apply(model.schema())) + .transform(deduper.filterAndCleanup()) + .transform(deduper.generateClustersWithCollect()); + + for (Row r : rowsDS.toJavaRDD().collect()) { + System.out.println("block key = " + r.get(0)); + System.out.println("block size = " + r.get(1)); + } + + } + + private void showGraph(Dataset entities, Dataset simRels) { + try { + prepareGraphParams( + entities, + simRels, + "/tmp/graph.html", + Paths + .get( + Objects + .requireNonNull( + SparkDedupLocalTest.class + .getResource("/eu/dnetlib/dhp/dedup/visualization_tools/graph_template.html")) + .toURI()) + .toFile() + .getAbsolutePath()); + Desktop.getDesktop().browse(new File("/tmp/graph.html").toURI()); + } catch (Exception e) { + e.printStackTrace(); + } + } - final String entitiesPath = Paths - .get(Objects.requireNonNull(SparkDedupLocalTest.class.getResource("/eu/dnetlib/dhp/dedup/entities/publication")).toURI()) - .toFile() - .getAbsolutePath(); - - final String dedupConfPath = Paths - .get(Objects.requireNonNull(SparkDedupLocalTest.class.getResource("/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json")).toURI()) - .toFile() - .getAbsolutePath(); - - final static int MAX_ACCEPTANCE_DATE = 20; - - private static SparkDeduper deduper; - private static SparkModel model; - - public SparkDedupLocalTest() throws URISyntaxException { - } - - @BeforeAll - public void setup() throws IOException { - - config = DedupConfig.load(fileToString(dedupConfPath)); - - spark = SparkSession - .builder() - .appName("Deduplication") - .master("local[*]") - .getOrCreate(); - context = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - deduper = new SparkDeduper(config); - - model = new SparkModel(config); - - } - - @Test //full deduplication workflow test - @Disabled - public void deduplicationTest() { - - long before_simrels = System.currentTimeMillis(); - - Dataset entities = spark - .read() - .textFile(entitiesPath) - .transform(deduper.model().parseJsonDataset()); - - Dataset simRels = entities - .transform(deduper.dedup()) - .distinct() - .map((MapFunction) t -> - DedupUtility.createSimRel(t.getStruct(0).getString(0), t.getStruct(0).getString(1), config.getWf().getEntityType()), Encoders.bean(Relation.class) - ); - - long simrels_time = System.currentTimeMillis() - before_simrels; - - long simrels_number = simRels.count(); - - long before_mergerels = System.currentTimeMillis(); - - UserDefinedFunction hashUDF = functions - .udf( - (String s) -> hash(s), DataTypes.LongType); - - // - Dataset vertexIdMap = simRels - .select("source", "target") - .selectExpr("source as id") - .union(simRels.selectExpr("target as id")) - .distinct() - .withColumn("vertexId", hashUDF.apply(functions.col("id"))); - - // transform simrels into pairs of numeric ids - final Dataset edges = simRels - .select("source", "target") - .withColumn("source", hashUDF.apply(functions.col("source"))) - .withColumn("target", hashUDF.apply(functions.col("target"))); - - for(Relation r: simRels.toJavaRDD().collect()) { - System.out.println(r.getSource() + " ---> " + r.getTarget()); - } - - - // resolve connected components - // ("vertexId", "groupId") - Dataset cliques = ConnectedComponent - .runOnPairs(edges, 50, spark); - - // transform "vertexId" back to its original string value - // groupId is kept numeric as its string value is not used - // ("id", "groupId") - Dataset mergeRels = cliques - .join(vertexIdMap, JavaConversions.asScalaBuffer(Collections.singletonList("vertexId")), "inner") - .drop("vertexId") - .distinct() - .flatMap((FlatMapFunction) (Row r) -> { - ArrayList res = new ArrayList<>(); - - String id = r.getAs("id"); - String groupId = r.getAs("groupId").toString(); - res.add(createRel(groupId, id, ModelConstants.MERGES, config)); - res.add(createRel(id, groupId, ModelConstants.IS_MERGED_IN, config)); - - return res.iterator(); - }, Encoders.bean(Relation.class)); - - - long mergerels_time = System.currentTimeMillis() - before_mergerels; - - long mergerels_number = mergeRels.count(); - - long before_dedupentity = System.currentTimeMillis(); - - final Class clazz = ModelSupport.entityTypes.get(EntityType.valueOf(config.getWf().getSubEntityValue())); - final Encoder beanEncoder = Encoders.bean(clazz); - final Encoder kryoEncoder = Encoders.kryo(clazz); - - Dataset kryoEntities = spark.read().schema(Encoders.bean(clazz).schema()).json(entitiesPath).as(beanEncoder).map( - (MapFunction>) entity -> { - return new Tuple2<>(entity.getId(), entity); - }, - Encoders.tuple(Encoders.STRING(), kryoEncoder)) - .selectExpr("_1 AS id", "_2 AS kryoObject"); - - // : source is the dedup_id, target is the id of the mergedIn - Dataset mergeRelsRow = mergeRels - .where("relClass == 'merges'") - .selectExpr("source as dedupId", "target as id"); - - Dataset dedupRecords = mergeRelsRow - .join(kryoEntities, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") - .select("dedupId", "id", "kryoObject") - .as(Encoders.tuple(Encoders.STRING(), Encoders.STRING(), kryoEncoder)) - .groupByKey((MapFunction, String>) Tuple3::_1, Encoders.STRING()) - .flatMapGroups( - (FlatMapGroupsFunction, OafEntity>) (dedupId, it) -> { - if (!it.hasNext()) - return Collections.emptyIterator(); - - final ArrayList cliques_ = new ArrayList<>(); - - final ArrayList aliases = new ArrayList<>(); - - final HashSet acceptanceDate = new HashSet<>(); - - while (it.hasNext()) { - Tuple3 t = it.next(); - OafEntity entity = t._3(); - - if (entity == null) { - aliases.add(t._2()); - } else { - cliques_.add(entity); - - if (acceptanceDate.size() < MAX_ACCEPTANCE_DATE) { //max acceptance date - if (Result.class.isAssignableFrom(entity.getClass())) { - Result result = (Result) entity; - if (result.getDateofacceptance() != null - && StringUtils.isNotBlank(result.getDateofacceptance().getValue())) { - acceptanceDate.add(result.getDateofacceptance().getValue()); - } - } - } - } - - } - - if (acceptanceDate.size() >= MAX_ACCEPTANCE_DATE || cliques_.isEmpty()) { - return Collections.emptyIterator(); - } - - OafEntity mergedEntity = MergeUtils.mergeGroup(dedupId, cliques_.iterator()); - // dedup records do not have date of transformation attribute - mergedEntity.setDateoftransformation(null); - - return Stream - .concat( - Stream - .of(dedupId) - .map(id -> DedupLocalTestUtils.createOafEntity(id, mergedEntity, before_dedupentity)), - aliases - .stream() - .map(id -> DedupLocalTestUtils.createOafEntity(id, mergedEntity, before_dedupentity))) - .iterator(); - - }, beanEncoder); - - long dedupentity_time = System.currentTimeMillis() - before_dedupentity; - - long dedupentity_number = dedupRecords.count(); - - System.out.println("Number of simRels : " + simrels_number); - System.out.println("Number of mergeRels : " + mergerels_number); - System.out.println("Number of dedupEntities : " + dedupentity_number); - System.out.println("Total time for simRels creation : " + simrels_time); - System.out.println("Total time for mergeRels creation : " + mergerels_time); - System.out.println("Total time for dedupEntities creation : " + dedupentity_time); - - showGraph(entities, simRels); - - } - - @Test //test the match between two JSON - @Disabled - public void matchTest() { - - String json1 = "{\"geolocation\": [], \"dataInfo\": {\"deletedbyinference\": true, \"provenanceaction\": {\"classid\": \"sysimport:dedup\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"dedup-result-decisiontree-v4\", \"invisible\": false, \"trust\": \"0.8\"}, \"resourcetype\": {\"classid\": \"UNKNOWN\", \"classname\": \"UNKNOWN\", \"schemename\": \"dnet:dataCite_resource\", \"schemeid\": \"dnet:dataCite_resource\"}, \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}, {\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/a5v4-hfh\"}, {\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/cg41-q6u\"}], \"bestaccessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"relevantdate\": [{\"qualifier\": {\"classid\": \"issued\", \"classname\": \"issued\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-05-22\"}, {\"qualifier\": {\"classid\": \"available\", \"classname\": \"available\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-03-31\"}], \"contributor\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"Amunts, Katrin\"}], \"id\": \"50|dedup_wf_002::639e90885a3cccbe75746679ff5492c7\", \"description\": [{\"value\": \"This dataset contains the distinct probabilistic cytoarchitectonic map of Area 5Ci (SPL) in the individual, single subject template of the MNI Colin 27 reference space. As part of the Julich-Brain cytoarchitectonic atlas, the area was identified using classical histological criteria and quantitative cytoarchitectonic analysis on cell-body-stained histological sections of 10 human postmortem brains obtained from the body donor program of the University of D\\\\u00fcsseldorf. The results of the cytoarchitectonic analysis were then mapped to the reference space, where each voxel was assigned the probability to belong to Area 5Ci (SPL). The probability map of Area 5Ci (SPL) is provided in NifTi format for each hemisphere in the reference space. The Julich-Brain atlas relies on a modular, flexible and adaptive framework containing workflows to create the probabilistic brain maps for these structures. Note that methodological improvements and updated probability estimates for new brain structures may in some cases lead to measurable but negligible deviations of existing probability maps, as compared to earlier released datasets. Other available data versions of Area 5Ci (SPL): Scheperjans et al. (2019) [Data set, v8.4] [DOI: 10.25493/SQVP-GK1](https://doi.org/10.25493%2FSQVP-GK1) The most probable delineation of Area 5Ci (SPL) derived from the calculation of a maximum probability map of all currently released Julich-Brain brain structures can be found here: Amunts et al. (2019) [Data set, v1.13] [DOI: 10.25493/Q3ZS-NV6](https://doi.org/10.25493%2FQ3ZS-NV6) Amunts et al. (2019) [Data set, v1.18] [DOI: 10.25493/8EGG-ZAR](https://doi.org/10.25493%2F8EGG-ZAR) Amunts et al. (2020) [Data set, v2.2] [DOI: 10.25493/TAKY-64D](https://doi.org/10.25493%2FTAKY-64D)\"}], \"embargoenddate\": {\"value\": \"2018-03-31\"}, \"lastupdatetimestamp\": 1725556617730, \"author\": [{\"surname\": \"Scheperjans\", \"name\": \"F.\", \"pid\": [], \"rank\": 1, \"affiliation\": [], \"fullname\": \"Scheperjans, F.\"}, {\"surname\": \"Hermann\", \"name\": \"K.\", \"pid\": [], \"rank\": 2, \"affiliation\": [], \"fullname\": \"Hermann, K.\"}, {\"surname\": \"Eickhoff\", \"name\": \"S. B.\", \"pid\": [], \"rank\": 3, \"affiliation\": [], \"fullname\": \"Eickhoff, S. B.\"}, {\"surname\": \"Amunts\", \"name\": \"K.\", \"pid\": [], \"rank\": 4, \"affiliation\": [], \"fullname\": \"Amunts, K.\"}, {\"surname\": \"Schleicher\", \"name\": \"A.\", \"pid\": [], \"rank\": 5, \"affiliation\": [], \"fullname\": \"Schleicher, A.\"}, {\"surname\": \"Hoemke\", \"name\": \"L.\", \"pid\": [], \"rank\": 6, \"affiliation\": [], \"fullname\": \"Hoemke, L.\"}, {\"surname\": \"Mohlberg\", \"name\": \"H.\", \"pid\": [], \"rank\": 7, \"affiliation\": [], \"fullname\": \"Mohlberg, H.\"}, {\"surname\": \"Zilles\", \"name\": \"K.\", \"pid\": [], \"rank\": 8, \"affiliation\": [], \"fullname\": \"Zilles, K.\"}], \"collectedfrom\": [{\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}], \"instance\": [{\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/bq0h-znc\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/a5v4-hfh\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/a5v4-hfh\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/cg41-q6u\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/cg41-q6u\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"Unknown Repository\", \"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\"}, \"license\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"CC BY NC SA\"}, \"url\": [\"https://doi.org/10.25493/bq0h-znc\"], \"pid\": [], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"alternateIdentifier\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"dateofacceptance\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}, \"accessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"Unknown Repository\", \"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\"}, \"license\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"CC BY NC SA\"}, \"url\": [\"https://doi.org/10.25493/a5v4-hfh\"], \"pid\": [], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"alternateIdentifier\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/a5v4-hfh\"}], \"dateofacceptance\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}, \"accessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"Unknown Repository\", \"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\"}, \"license\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"CC BY NC SA\"}, \"url\": [\"https://doi.org/10.25493/cg41-q6u\"], \"pid\": [], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"alternateIdentifier\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/cg41-q6u\"}], \"dateofacceptance\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}, \"accessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}], \"version\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"null\"}, \"storagedate\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"dateofcollection\": \"2024-09-05T17:19:05.982\", \"metaResourceType\": {\"classid\": \"Research Data\", \"classname\": \"Research Data\", \"schemename\": \"openaire::meta_resource_types\", \"schemeid\": \"openaire::meta_resource_types\"}, \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"format\": [], \"subject\": [{\"qualifier\": {\"classid\": \"keyword\", \"classname\": \"keyword\", \"schemename\": \"dnet:subject_classification_typologies\", \"schemeid\": \"dnet:subject_classification_typologies\"}, \"value\": \"Neuroscience\"}], \"coverage\": [], \"externalReference\": [], \"publisher\": {\"value\": \"Human Brain Project Neuroinformatics Platform\"}, \"eoscifguidelines\": [], \"language\": {\"classid\": \"und\", \"classname\": \"Undetermined\", \"schemename\": \"dnet:languages\", \"schemeid\": \"dnet:languages\"}, \"resulttype\": {\"classid\": \"dataset\", \"classname\": \"dataset\", \"schemename\": \"dnet:result_typologies\", \"schemeid\": \"dnet:result_typologies\"}, \"country\": [], \"extraInfo\": [], \"originalId\": [\"50|datacite____::7f98d6c254b0d743c4634412ed94817b\", \"10.25493/bq0h-znc\", \"50|datacite____::4bc8e949f3238624d64980b21cbf8441\", \"10.25493/a5v4-hfh\", \"50|datacite____::098cabeb288616a6a4ae7e950f2a372e\", \"10.25493/cg41-q6u\", \"50|sygma_______::7f98d6c254b0d743c4634412ed94817b\", \"50|sygma_______::4bc8e949f3238624d64980b21cbf8441\", \"50|sygma_______::098cabeb288616a6a4ae7e950f2a372e\"], \"source\": [], \"context\": [{\"dataInfo\": [{\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}], \"id\": \"fet-h2020::flag::85792436c02c::785907\"}, {\"dataInfo\": [{\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}], \"id\": \"fet-h2020::flag::91670347c2a7::945539\"}], \"title\": [{\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5Ci (SPL) (v8.2)\"}, {\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5L (SPL) (v8.2)\"}, {\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5M (SPL) (v8.2)\"}]}"; - String json2 = "{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": true, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"resourcetype\": {\"classid\": \"UNKNOWN\", \"classname\": \"UNKNOWN\", \"schemename\": \"dnet:dataCite_resource\", \"schemeid\": \"dnet:dataCite_resource\"}, \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"bestaccessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"relevantdate\": [{\"qualifier\": {\"classid\": \"issued\", \"classname\": \"issued\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-05-22\"}, {\"qualifier\": {\"classid\": \"available\", \"classname\": \"available\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-03-31\"}], \"collectedfrom\": [{\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}], \"id\": \"50|doi_________::7f98d6c254b0d743c4634412ed94817b\", \"subject\": [{\"qualifier\": {\"classid\": \"keyword\", \"classname\": \"keyword\", \"schemename\": \"dnet:subject_classification_typologies\", \"schemeid\": \"dnet:subject_classification_typologies\"}, \"value\": \"Neuroscience\"}], \"embargoenddate\": {\"value\": \"2018-03-31\"}, \"author\": [{\"surname\": \"Scheperjans\", \"name\": \"F.\", \"pid\": [], \"rank\": 1, \"affiliation\": [], \"fullname\": \"Scheperjans, F.\"}, {\"surname\": \"Hermann\", \"name\": \"K.\", \"pid\": [], \"rank\": 2, \"affiliation\": [], \"fullname\": \"Hermann, K.\"}, {\"surname\": \"Eickhoff\", \"name\": \"S. B.\", \"pid\": [], \"rank\": 3, \"affiliation\": [], \"fullname\": \"Eickhoff, S. B.\"}, {\"surname\": \"Amunts\", \"name\": \"K.\", \"pid\": [], \"rank\": 4, \"affiliation\": [], \"fullname\": \"Amunts, K.\"}, {\"surname\": \"Schleicher\", \"name\": \"A.\", \"pid\": [], \"rank\": 5, \"affiliation\": [], \"fullname\": \"Schleicher, A.\"}, {\"surname\": \"Hoemke\", \"name\": \"L.\", \"pid\": [], \"rank\": 6, \"affiliation\": [], \"fullname\": \"Hoemke, L.\"}, {\"surname\": \"Mohlberg\", \"name\": \"H.\", \"pid\": [], \"rank\": 7, \"affiliation\": [], \"fullname\": \"Mohlberg, H.\"}, {\"surname\": \"Zilles\", \"name\": \"K.\", \"pid\": [], \"rank\": 8, \"affiliation\": [], \"fullname\": \"Zilles, K.\"}], \"description\": [{\"value\": \"This dataset contains the distinct probabilistic cytoarchitectonic map of Area 5Ci (SPL) in the individual, single subject template of the MNI Colin 27 reference space. As part of the Julich-Brain cytoarchitectonic atlas, the area was identified using classical histological criteria and quantitative cytoarchitectonic analysis on cell-body-stained histological sections of 10 human postmortem brains obtained from the body donor program of the University of D\\\\u00fcsseldorf. The results of the cytoarchitectonic analysis were then mapped to the reference space, where each voxel was assigned the probability to belong to Area 5Ci (SPL). The probability map of Area 5Ci (SPL) is provided in NifTi format for each hemisphere in the reference space. The Julich-Brain atlas relies on a modular, flexible and adaptive framework containing workflows to create the probabilistic brain maps for these structures. Note that methodological improvements and updated probability estimates for new brain structures may in some cases lead to measurable but negligible deviations of existing probability maps, as compared to earlier released datasets. Other available data versions of Area 5Ci (SPL): Scheperjans et al. (2019) [Data set, v8.4] [DOI: 10.25493/SQVP-GK1](https://doi.org/10.25493%2FSQVP-GK1) The most probable delineation of Area 5Ci (SPL) derived from the calculation of a maximum probability map of all currently released Julich-Brain brain structures can be found here: Amunts et al. (2019) [Data set, v1.13] [DOI: 10.25493/Q3ZS-NV6](https://doi.org/10.25493%2FQ3ZS-NV6) Amunts et al. (2019) [Data set, v1.18] [DOI: 10.25493/8EGG-ZAR](https://doi.org/10.25493%2F8EGG-ZAR) Amunts et al. (2020) [Data set, v2.2] [DOI: 10.25493/TAKY-64D](https://doi.org/10.25493%2FTAKY-64D)\"}], \"instance\": [{\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/bq0h-znc\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}], \"dateofcollection\": \"2021-07-20T01:01:03+0000\", \"metaResourceType\": {\"classid\": \"Research Data\", \"classname\": \"Research Data\", \"schemename\": \"openaire::meta_resource_types\", \"schemeid\": \"openaire::meta_resource_types\"}, \"dateoftransformation\": \"2021-07-20T01:01:03+0000\", \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"publisher\": {\"value\": \"Human Brain Project Neuroinformatics Platform\"}, \"language\": {\"classid\": \"und\", \"classname\": \"Undetermined\", \"schemename\": \"dnet:languages\", \"schemeid\": \"dnet:languages\"}, \"resulttype\": {\"classid\": \"dataset\", \"classname\": \"dataset\", \"schemename\": \"dnet:result_typologies\", \"schemeid\": \"dnet:result_typologies\"}, \"title\": [{\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5Ci (SPL) (v8.2)\"}], \"originalId\": [\"50|datacite____::7f98d6c254b0d743c4634412ed94817b\", \"10.25493/bq0h-znc\"], \"context\": []}"; - - Row a = model.rowFromJson(json1); - Row b = model.rowFromJson(json2); - - boolean result = new TreeProcessor(config).compare(a,b); - - System.out.println("Tree Processor Result = " + result); - - } - - @Test //test the keys between two JSON - @Disabled - public void blockingTest() { - - String json1 = "{\"geolocation\": [], \"dataInfo\": {\"deletedbyinference\": true, \"provenanceaction\": {\"classid\": \"sysimport:dedup\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"dedup-result-decisiontree-v4\", \"invisible\": false, \"trust\": \"0.8\"}, \"resourcetype\": {\"classid\": \"UNKNOWN\", \"classname\": \"UNKNOWN\", \"schemename\": \"dnet:dataCite_resource\", \"schemeid\": \"dnet:dataCite_resource\"}, \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}, {\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/a5v4-hfh\"}, {\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/cg41-q6u\"}], \"bestaccessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"relevantdate\": [{\"qualifier\": {\"classid\": \"issued\", \"classname\": \"issued\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-05-22\"}, {\"qualifier\": {\"classid\": \"available\", \"classname\": \"available\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-03-31\"}], \"contributor\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"Amunts, Katrin\"}], \"id\": \"50|dedup_wf_002::639e90885a3cccbe75746679ff5492c7\", \"description\": [{\"value\": \"This dataset contains the distinct probabilistic cytoarchitectonic map of Area 5Ci (SPL) in the individual, single subject template of the MNI Colin 27 reference space. As part of the Julich-Brain cytoarchitectonic atlas, the area was identified using classical histological criteria and quantitative cytoarchitectonic analysis on cell-body-stained histological sections of 10 human postmortem brains obtained from the body donor program of the University of D\\\\u00fcsseldorf. The results of the cytoarchitectonic analysis were then mapped to the reference space, where each voxel was assigned the probability to belong to Area 5Ci (SPL). The probability map of Area 5Ci (SPL) is provided in NifTi format for each hemisphere in the reference space. The Julich-Brain atlas relies on a modular, flexible and adaptive framework containing workflows to create the probabilistic brain maps for these structures. Note that methodological improvements and updated probability estimates for new brain structures may in some cases lead to measurable but negligible deviations of existing probability maps, as compared to earlier released datasets. Other available data versions of Area 5Ci (SPL): Scheperjans et al. (2019) [Data set, v8.4] [DOI: 10.25493/SQVP-GK1](https://doi.org/10.25493%2FSQVP-GK1) The most probable delineation of Area 5Ci (SPL) derived from the calculation of a maximum probability map of all currently released Julich-Brain brain structures can be found here: Amunts et al. (2019) [Data set, v1.13] [DOI: 10.25493/Q3ZS-NV6](https://doi.org/10.25493%2FQ3ZS-NV6) Amunts et al. (2019) [Data set, v1.18] [DOI: 10.25493/8EGG-ZAR](https://doi.org/10.25493%2F8EGG-ZAR) Amunts et al. (2020) [Data set, v2.2] [DOI: 10.25493/TAKY-64D](https://doi.org/10.25493%2FTAKY-64D)\"}], \"embargoenddate\": {\"value\": \"2018-03-31\"}, \"lastupdatetimestamp\": 1725556617730, \"author\": [{\"surname\": \"Scheperjans\", \"name\": \"F.\", \"pid\": [], \"rank\": 1, \"affiliation\": [], \"fullname\": \"Scheperjans, F.\"}, {\"surname\": \"Hermann\", \"name\": \"K.\", \"pid\": [], \"rank\": 2, \"affiliation\": [], \"fullname\": \"Hermann, K.\"}, {\"surname\": \"Eickhoff\", \"name\": \"S. B.\", \"pid\": [], \"rank\": 3, \"affiliation\": [], \"fullname\": \"Eickhoff, S. B.\"}, {\"surname\": \"Amunts\", \"name\": \"K.\", \"pid\": [], \"rank\": 4, \"affiliation\": [], \"fullname\": \"Amunts, K.\"}, {\"surname\": \"Schleicher\", \"name\": \"A.\", \"pid\": [], \"rank\": 5, \"affiliation\": [], \"fullname\": \"Schleicher, A.\"}, {\"surname\": \"Hoemke\", \"name\": \"L.\", \"pid\": [], \"rank\": 6, \"affiliation\": [], \"fullname\": \"Hoemke, L.\"}, {\"surname\": \"Mohlberg\", \"name\": \"H.\", \"pid\": [], \"rank\": 7, \"affiliation\": [], \"fullname\": \"Mohlberg, H.\"}, {\"surname\": \"Zilles\", \"name\": \"K.\", \"pid\": [], \"rank\": 8, \"affiliation\": [], \"fullname\": \"Zilles, K.\"}], \"collectedfrom\": [{\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}], \"instance\": [{\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/bq0h-znc\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/a5v4-hfh\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/a5v4-hfh\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/cg41-q6u\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/cg41-q6u\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"Unknown Repository\", \"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\"}, \"license\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"CC BY NC SA\"}, \"url\": [\"https://doi.org/10.25493/bq0h-znc\"], \"pid\": [], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"alternateIdentifier\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"dateofacceptance\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}, \"accessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"Unknown Repository\", \"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\"}, \"license\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"CC BY NC SA\"}, \"url\": [\"https://doi.org/10.25493/a5v4-hfh\"], \"pid\": [], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"alternateIdentifier\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/a5v4-hfh\"}], \"dateofacceptance\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}, \"accessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}, {\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"Unknown Repository\", \"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\"}, \"license\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"CC BY NC SA\"}, \"url\": [\"https://doi.org/10.25493/cg41-q6u\"], \"pid\": [], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"alternateIdentifier\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/cg41-q6u\"}], \"dateofacceptance\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Sygma\", \"key\": \"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9\"}, \"accessright\": {\"classid\": \"OPEN\", \"classname\": \"Open Access\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}], \"version\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"null\"}, \"storagedate\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"sysimport:crosswalk:datasetarchive\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"0.9\"}, \"value\": \"2018-05-22\"}, \"dateofcollection\": \"2024-09-05T17:19:05.982\", \"metaResourceType\": {\"classid\": \"Research Data\", \"classname\": \"Research Data\", \"schemename\": \"openaire::meta_resource_types\", \"schemeid\": \"openaire::meta_resource_types\"}, \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"format\": [], \"subject\": [{\"qualifier\": {\"classid\": \"keyword\", \"classname\": \"keyword\", \"schemename\": \"dnet:subject_classification_typologies\", \"schemeid\": \"dnet:subject_classification_typologies\"}, \"value\": \"Neuroscience\"}], \"coverage\": [], \"externalReference\": [], \"publisher\": {\"value\": \"Human Brain Project Neuroinformatics Platform\"}, \"eoscifguidelines\": [], \"language\": {\"classid\": \"und\", \"classname\": \"Undetermined\", \"schemename\": \"dnet:languages\", \"schemeid\": \"dnet:languages\"}, \"resulttype\": {\"classid\": \"dataset\", \"classname\": \"dataset\", \"schemename\": \"dnet:result_typologies\", \"schemeid\": \"dnet:result_typologies\"}, \"country\": [], \"extraInfo\": [], \"originalId\": [\"50|datacite____::7f98d6c254b0d743c4634412ed94817b\", \"10.25493/bq0h-znc\", \"50|datacite____::4bc8e949f3238624d64980b21cbf8441\", \"10.25493/a5v4-hfh\", \"50|datacite____::098cabeb288616a6a4ae7e950f2a372e\", \"10.25493/cg41-q6u\", \"50|sygma_______::7f98d6c254b0d743c4634412ed94817b\", \"50|sygma_______::4bc8e949f3238624d64980b21cbf8441\", \"50|sygma_______::098cabeb288616a6a4ae7e950f2a372e\"], \"source\": [], \"context\": [{\"dataInfo\": [{\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}], \"id\": \"fet-h2020::flag::85792436c02c::785907\"}, {\"dataInfo\": [{\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}, {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"iis\", \"classname\": \"Inferred by OpenAIRE\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"iis::document_referencedProjects\", \"invisible\": false, \"trust\": \"0.9\"}], \"id\": \"fet-h2020::flag::91670347c2a7::945539\"}], \"title\": [{\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5Ci (SPL) (v8.2)\"}, {\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5L (SPL) (v8.2)\"}, {\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5M (SPL) (v8.2)\"}]}"; - String json2 = "{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": true, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"resourcetype\": {\"classid\": \"UNKNOWN\", \"classname\": \"UNKNOWN\", \"schemename\": \"dnet:dataCite_resource\", \"schemeid\": \"dnet:dataCite_resource\"}, \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"bestaccessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"relevantdate\": [{\"qualifier\": {\"classid\": \"issued\", \"classname\": \"issued\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-05-22\"}, {\"qualifier\": {\"classid\": \"available\", \"classname\": \"available\", \"schemename\": \"dnet:dataCite_date\", \"schemeid\": \"dnet:dataCite_date\"}, \"value\": \"2018-03-31\"}], \"collectedfrom\": [{\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}], \"id\": \"50|doi_________::7f98d6c254b0d743c4634412ed94817b\", \"subject\": [{\"qualifier\": {\"classid\": \"keyword\", \"classname\": \"keyword\", \"schemename\": \"dnet:subject_classification_typologies\", \"schemeid\": \"dnet:subject_classification_typologies\"}, \"value\": \"Neuroscience\"}], \"embargoenddate\": {\"value\": \"2018-03-31\"}, \"author\": [{\"surname\": \"Scheperjans\", \"name\": \"F.\", \"pid\": [], \"rank\": 1, \"affiliation\": [], \"fullname\": \"Scheperjans, F.\"}, {\"surname\": \"Hermann\", \"name\": \"K.\", \"pid\": [], \"rank\": 2, \"affiliation\": [], \"fullname\": \"Hermann, K.\"}, {\"surname\": \"Eickhoff\", \"name\": \"S. B.\", \"pid\": [], \"rank\": 3, \"affiliation\": [], \"fullname\": \"Eickhoff, S. B.\"}, {\"surname\": \"Amunts\", \"name\": \"K.\", \"pid\": [], \"rank\": 4, \"affiliation\": [], \"fullname\": \"Amunts, K.\"}, {\"surname\": \"Schleicher\", \"name\": \"A.\", \"pid\": [], \"rank\": 5, \"affiliation\": [], \"fullname\": \"Schleicher, A.\"}, {\"surname\": \"Hoemke\", \"name\": \"L.\", \"pid\": [], \"rank\": 6, \"affiliation\": [], \"fullname\": \"Hoemke, L.\"}, {\"surname\": \"Mohlberg\", \"name\": \"H.\", \"pid\": [], \"rank\": 7, \"affiliation\": [], \"fullname\": \"Mohlberg, H.\"}, {\"surname\": \"Zilles\", \"name\": \"K.\", \"pid\": [], \"rank\": 8, \"affiliation\": [], \"fullname\": \"Zilles, K.\"}], \"description\": [{\"value\": \"This dataset contains the distinct probabilistic cytoarchitectonic map of Area 5Ci (SPL) in the individual, single subject template of the MNI Colin 27 reference space. As part of the Julich-Brain cytoarchitectonic atlas, the area was identified using classical histological criteria and quantitative cytoarchitectonic analysis on cell-body-stained histological sections of 10 human postmortem brains obtained from the body donor program of the University of D\\\\u00fcsseldorf. The results of the cytoarchitectonic analysis were then mapped to the reference space, where each voxel was assigned the probability to belong to Area 5Ci (SPL). The probability map of Area 5Ci (SPL) is provided in NifTi format for each hemisphere in the reference space. The Julich-Brain atlas relies on a modular, flexible and adaptive framework containing workflows to create the probabilistic brain maps for these structures. Note that methodological improvements and updated probability estimates for new brain structures may in some cases lead to measurable but negligible deviations of existing probability maps, as compared to earlier released datasets. Other available data versions of Area 5Ci (SPL): Scheperjans et al. (2019) [Data set, v8.4] [DOI: 10.25493/SQVP-GK1](https://doi.org/10.25493%2FSQVP-GK1) The most probable delineation of Area 5Ci (SPL) derived from the calculation of a maximum probability map of all currently released Julich-Brain brain structures can be found here: Amunts et al. (2019) [Data set, v1.13] [DOI: 10.25493/Q3ZS-NV6](https://doi.org/10.25493%2FQ3ZS-NV6) Amunts et al. (2019) [Data set, v1.18] [DOI: 10.25493/8EGG-ZAR](https://doi.org/10.25493%2F8EGG-ZAR) Amunts et al. (2020) [Data set, v2.2] [DOI: 10.25493/TAKY-64D](https://doi.org/10.25493%2FTAKY-64D)\"}], \"instance\": [{\"refereed\": {\"classid\": \"0002\", \"classname\": \"nonPeerReviewed\", \"schemename\": \"dnet:review_levels\", \"schemeid\": \"dnet:review_levels\"}, \"hostedby\": {\"value\": \"EBRAINS\", \"key\": \"10|fairsharing_::2ef3e50fd7c1091dda165f25be7f64fd\"}, \"license\": {\"value\": \"CC BY NC SA\"}, \"url\": [\"https://dx.doi.org/10.25493/bq0h-znc\"], \"pid\": [{\"dataInfo\": {\"invisible\": false, \"trust\": \"0.9\", \"deletedbyinference\": false, \"inferred\": false, \"provenanceaction\": {\"classid\": \"sysimport:actionset\", \"classname\": \"Harvested\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}}, \"qualifier\": {\"classid\": \"doi\", \"classname\": \"Digital Object Identifier\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"10.25493/bq0h-znc\"}], \"instanceTypeMapping\": [{\"originalType\": \"Dataset\", \"typeLabel\": \"dataset\", \"vocabularyName\": \"openaire::coar_resource_types_3_1\", \"typeCode\": \"http://purl.org/coar/resource_type/c_ddb1\"}], \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"collectedfrom\": {\"value\": \"Datacite\", \"key\": \"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254\"}, \"accessright\": {\"classid\": \"UNKNOWN\", \"classname\": \"not available\", \"schemename\": \"dnet:access_modes\", \"schemeid\": \"dnet:access_modes\"}, \"instancetype\": {\"classid\": \"0021\", \"classname\": \"Dataset\", \"schemename\": \"dnet:publication_resource\", \"schemeid\": \"dnet:publication_resource\"}}], \"dateofcollection\": \"2021-07-20T01:01:03+0000\", \"metaResourceType\": {\"classid\": \"Research Data\", \"classname\": \"Research Data\", \"schemename\": \"openaire::meta_resource_types\", \"schemeid\": \"openaire::meta_resource_types\"}, \"dateoftransformation\": \"2021-07-20T01:01:03+0000\", \"dateofacceptance\": {\"value\": \"2018-05-22\"}, \"publisher\": {\"value\": \"Human Brain Project Neuroinformatics Platform\"}, \"language\": {\"classid\": \"und\", \"classname\": \"Undetermined\", \"schemename\": \"dnet:languages\", \"schemeid\": \"dnet:languages\"}, \"resulttype\": {\"classid\": \"dataset\", \"classname\": \"dataset\", \"schemename\": \"dnet:result_typologies\", \"schemeid\": \"dnet:result_typologies\"}, \"title\": [{\"qualifier\": {\"classid\": \"main title\", \"classname\": \"main title\", \"schemename\": \"dnet:dataCite_title\", \"schemeid\": \"dnet:dataCite_title\"}, \"value\": \"Probabilistic cytoarchitectonic map of Area 5Ci (SPL) (v8.2)\"}], \"originalId\": [\"50|datacite____::7f98d6c254b0d743c4634412ed94817b\", \"10.25493/bq0h-znc\"], \"context\": []}"; - - Row a = model.rowFromJson(json1); - Row b = model.rowFromJson(json2); - - List rows = Lists.newArrayList(a, b); - - Dataset rowsDS = spark.createDataset(rows, RowEncoder.apply(model.schema())) - .transform(deduper.filterAndCleanup()) - .transform(deduper.generateClustersWithCollect()); - - for (Row r: rowsDS.toJavaRDD().collect()) { - System.out.println("block key = " + r.get(0)); - System.out.println("block size = " + r.get(1)); - } - - } - - private void showGraph(Dataset entities, Dataset simRels) { - try { - prepareGraphParams( - entities, - simRels, - "/tmp/graph.html", - Paths.get(Objects.requireNonNull(SparkDedupLocalTest.class.getResource("/eu/dnetlib/dhp/dedup/visualization_tools/graph_template.html")).toURI()).toFile().getAbsolutePath()); - Desktop.getDesktop().browse(new File("/tmp/graph.html").toURI()); - } catch (Exception e) { - e.printStackTrace(); - } - } - -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java index 73243dbc5..d4fec3f52 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java @@ -72,9 +72,9 @@ public class GraphHiveTableImporterJob { final Encoder clazzEncoder = Encoders.bean(clazz); Dataset dataset = spark - .read() - .schema(clazzEncoder.schema()) - .json(inputPath); + .read() + .schema(clazzEncoder.schema()) + .json(inputPath); if (numPartitions > 0) { log.info("repartitioning {} to {} partitions", clazz.getSimpleName(), numPartitions); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorMatchersTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorMatchersTest.scala index 4e5ad5365..eece56b74 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorMatchersTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorMatchersTest.scala @@ -31,6 +31,7 @@ class ORCIDAuthorMatchersTest { assertTrue(matchOrderedTokenAndAbbreviations("孙林 Sun Lin", "Sun Lin")) // assertTrue(AuthorsMatchRevised.compare("孙林 Sun Lin", "孙林")); // not yet implemented } + @Test def testDocumentationNames(): Unit = { assertTrue(matchOrderedTokenAndAbbreviations("James C. A. Miller-Jones", "James Antony Miller-Jones")) }