This commit is contained in:
Miriam Baglioni 2024-02-20 09:57:33 +01:00
parent c3be9a7b14
commit e2b9989199
31 changed files with 2174 additions and 674 deletions

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
/**
@ -5,11 +6,7 @@ package eu.dnetlib.dhp.skgif.model;
* @Date 04/09/23
*/
public enum AccessRight {
OPEN("open"),
CLOSED("closed"),
EMBARGO("embargo"),
RESTRICTED("restricted"),
UNAVAILABLE("unavailable");
OPEN("open"), CLOSED("closed"), EMBARGO("embargo"), RESTRICTED("restricted"), UNAVAILABLE("unavailable");
public final String label;

View File

@ -1,9 +1,10 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* @author miriam.baglioni
* @Date 05/09/23

View File

@ -1,9 +1,10 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* @author miriam.baglioni
* @Date 01/09/23

View File

@ -1,10 +1,11 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* @author miriam.baglioni
* @Date 01/09/23

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -25,4 +26,11 @@ public class Dates implements Serializable {
public void setType(String type) {
this.type = type;
}
public static Dates newInstance(String value, String type) {
Dates d = new Dates();
d.value = value;
d.type = type;
return d;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -26,4 +27,11 @@ public class Identifier implements Serializable {
this.value = value;
}
public static Identifier newInstance(String scheme, String value) {
Identifier i = new Identifier();
i.value = value;
i.scheme = scheme;
return i;
}
}

View File

@ -1,11 +1,12 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
import java.net.URL;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* @author miriam.baglioni
* @Date 01/09/23
@ -20,7 +21,7 @@ public class Manifestation implements Serializable {
private String peerReview;
@JsonProperty("metadata_curation")
private String metadataCuration;
private URL url;
private String url;
private String pid;
@JsonProperty("access_right")
private String accessRight;
@ -72,11 +73,11 @@ public class Manifestation implements Serializable {
this.metadataCuration = metadataCuration;
}
public URL getUrl() {
public String getUrl() {
return url;
}
public void setUrl(URL url) {
public void setUrl(String url) {
this.url = url;
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
/**
@ -5,9 +6,7 @@ package eu.dnetlib.dhp.skgif.model;
* @Date 04/09/23
*/
public enum MetadataCuration {
YES("yes"),
NO("no"),
UNAVAILABLE("unavailable");
YES("yes"), NO("no"), UNAVAILABLE("unavailable");
public final String label;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
/**
@ -5,12 +6,8 @@ package eu.dnetlib.dhp.skgif.model;
* @Date 04/09/23
*/
public enum PeerReview {
PEER_REVIEWED("peer-reviewed"),
NON_PEER_REVIEWED("not peer-reviewed"),
DOUBLE_BLIND("double-blind"),
SINGLE_BLIND("single-blind"),
UNAVAILABLE("unavailable"),
OPEN("open peer review");
PEER_REVIEWED("peer-reviewed"), NON_PEER_REVIEWED("not peer-reviewed"), DOUBLE_BLIND("double-blind"), SINGLE_BLIND(
"single-blind"), UNAVAILABLE("unavailable"), OPEN("open peer review");
public final String label;

View File

@ -1,10 +1,11 @@
package eu.dnetlib.dhp.skgif.model;
import org.codehaus.jackson.annotate.JsonProperty;
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
import java.util.List;
import org.codehaus.jackson.annotate.JsonProperty;
/**
* @author miriam.baglioni
* @Date 05/09/23
@ -19,7 +20,7 @@ public class Persons implements Serializable {
private String familyName;
private String agent;
@JsonProperty("declared_affiliations")
private List<Affiliation>declaredAffiliations;
private List<Affiliation> declaredAffiliations;
public String getLocalIdentifier() {
return localIdentifier;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -7,12 +8,8 @@ import java.io.Serializable;
* @Date 05/09/23
*/
public enum RelationType implements Serializable {
OUTCOME("outcome"),
AFFILIATION("hasAuthorInstitution"),
SUPPLEMENT("IsSupplementedBy"),
DOCUMENTS("IsDocumentedBy"),
PART("IsPartOf"),
VERSION("IsNewVersioneOf");
OUTCOME("outcome"), AFFILIATION("hasAuthorInstitution"), SUPPLEMENT("IsSupplementedBy"), DOCUMENTS(
"IsDocumentedBy"), PART("IsPartOf"), VERSION("IsNewVersioneOf");
public final String label;

View File

@ -1,10 +1,13 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
import eu.dnetlib.dhp.oa.model.graph.Relation;
/**
* @author miriam.baglioni
* @Date 01/09/23
@ -15,6 +18,13 @@ public class Relations implements Serializable {
@JsonProperty("product_list")
private List<String> productList;
public static Relations newInstance(String relClass, List<String> target) {
Relations r = new Relations();
r.relationType = relClass;
r.productList = target;
return r;
}
public String getRelationType() {
return relationType;
}

View File

@ -1,10 +1,11 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* @author miriam.baglioni
* @Date 01/09/23
@ -17,7 +18,7 @@ public class ResearchProduct implements Serializable {
private List<String> abstracts;
@JsonProperty("product_type")
private String productType;
private List<Topic> topics;
private List<ResultTopic> topics;
private List<Contribution> contributions;
private List<Manifestation> manifestations;
@JsonProperty("relevant_organizations")
@ -66,11 +67,11 @@ public class ResearchProduct implements Serializable {
this.productType = productType;
}
public List<Topic> getTopics() {
public List<ResultTopic> getTopics() {
return topics;
}
public void setTopics(List<Topic> topics) {
public void setTopics(List<ResultTopic> topics) {
this.topics = topics;
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
/**
@ -5,10 +6,7 @@ package eu.dnetlib.dhp.skgif.model;
* @Date 01/09/23
*/
public enum ResearchTypes {
LITERATURE("literature"),
RESEARCH_DATA("research data"),
RESEARCH_SOFTWARE("research software"),
OTHER("other");
LITERATURE("literature"), RESEARCH_DATA("research data"), RESEARCH_SOFTWARE("research software"), OTHER("other");
public final String label;

View File

@ -0,0 +1,29 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 16/02/24
*/
public class ResultTopic implements Serializable {
private String topic;
private Provenance provenance;
public String getTopic() {
return topic;
}
public void setTopic(String topic) {
this.topic = topic;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
}

View File

@ -1,28 +1,39 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
import java.util.List;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Topic implements Serializable {
private String topic;
private Provenance provenance;
private String local_identifier;
private List<Identifier> identifiers;
private String name;
public String getTopic() {
return topic;
public String getLocal_identifier() {
return local_identifier;
}
public void setTopic(String topic) {
this.topic = topic;
public void setLocal_identifier(String local_identifier) {
this.local_identifier = local_identifier;
}
public Provenance getProvenance() {
return provenance;
public List<Identifier> getIdentifiers() {
return identifiers;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
public void setIdentifiers(List<Identifier> identifiers) {
this.identifiers = identifiers;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}

View File

@ -0,0 +1,304 @@
package eu.dnetlib.dhp.skgif;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.skgif.Utils.getOrcid;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructType;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.skgif.beans.EmitPerManifestation;
import eu.dnetlib.dhp.skgif.beans.PartialResearchProduct;
import eu.dnetlib.dhp.skgif.beans.RelationPerProduct;
import eu.dnetlib.dhp.skgif.model.*;
import eu.dnetlib.dhp.skgif.model.AccessRight;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 06/02/24
*/
public class DumpResult implements Serializable {
private static final Logger log = LoggerFactory.getLogger(DumpResult.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/dump_result_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String workingDir = parser.get("workingDir");
log.info("workingDir: {}", workingDir);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
mapResult(spark, inputPath, outputPath, workingDir);
});
}
//per ogni result emetto id + journal se esiste + istanza + hosted by dell'istanza
public static <R extends Result> void mapResult(SparkSession spark, String inputPath, String outputPath,
String workingDir) {
// selection of the relevant relations from result type to other entity. Only teh semantic relevant ones are
// considered
selectRelations(spark, inputPath, workingDir);
// merge of relations and manifestation for the same result
getRelationAndManifestation(spark, workingDir, inputPath);
// dump of the result and enrichment with relevant information for relations and manifestations
dumpResult(spark, inputPath, workingDir);
}
private static void getRelationAndManifestation(SparkSession spark, String workingDir, String inputPath) {
Dataset<RelationPerProduct> aggRelations = Utils
.readPath(spark, workingDir + "aggrelation", RelationPerProduct.class);
ModelSupport.entityTypes
.keySet()
.parallelStream()
.filter(ModelSupport::isResult)
.forEach(e -> {
Dataset<Datasource> datasource = Utils
.readPath(spark, inputPath + "/datasource", Datasource.class)
.filter(
(FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEosctype()).isPresent() &&
d.getEosctype().getClassname().equalsIgnoreCase("Journal archive"));
Dataset<EmitPerManifestation> man = Utils
.readPath(spark, workingDir + e.name() + "/manifestation", EmitPerManifestation.class);
man
.joinWith(aggRelations, man.col("resultId").equalTo(aggRelations.col("resultId")), "left")
.groupByKey(
(MapFunction<Tuple2<EmitPerManifestation, RelationPerProduct>, String>) t2 -> t2
._1()
.getResultId(),
Encoders.STRING())
.mapGroups(
(MapGroupsFunction<String, Tuple2<EmitPerManifestation, RelationPerProduct>, PartialResearchProduct>) (
k, v) -> {
PartialResearchProduct prp = new PartialResearchProduct();
prp.setResultId(k);
List<EmitPerManifestation> epms = new ArrayList<>();
Tuple2<EmitPerManifestation, RelationPerProduct> first = v.next();
RelationPerProduct rpp = first._2();
epms.add(first._1());
v.forEachRemaining(t2 -> epms.add(t2._1()));
Dataset<EmitPerManifestation> emitformanifestation = spark
.createDataset(epms, Encoders.bean(EmitPerManifestation.class));
prp.setManifestations(getManifestationList(emitformanifestation, datasource));
prp.setRelatedProducts(rpp.getRelatedProduct());
prp.setRelevantOrganizations(rpp.getOrganizations());
prp.setFunding(rpp.getFunding());
return prp;
}, Encoders.bean(PartialResearchProduct.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir + e.name() + "/partialResearchproduct");
});
}
private static List<Manifestation> getManifestationList(Dataset<EmitPerManifestation> emitformanifestation,
Dataset<Datasource> datasource) {
return emitformanifestation
.joinWith(
datasource, emitformanifestation
.col("hostedBy")
.equalTo(datasource.col("id")),
"left")
.map((MapFunction<Tuple2<EmitPerManifestation, Datasource>, Manifestation>) t2 -> {
// se il lato sinistro c'e' allora ho la biblio e la venue
// se non c'e' allora ho solo gli altri valori
EmitPerManifestation epm = t2._1();
Manifestation manifestation = new Manifestation();
manifestation.setProductLocalTypeSchema(epm.getInstance().getInstancetype().getClassname());
manifestation.setProductLocalTypeSchema(epm.getInstance().getInstancetype().getSchemename());
manifestation
.setDates(
Arrays
.asList(
Dates.newInstance(epm.getInstance().getDateofacceptance().getValue(), "publishing")));
if (Optional.ofNullable(epm.getInstance().getRefereed()).isPresent())
switch (epm.getInstance().getRefereed().getClassid()) {
case "0000":
manifestation.setPeerReview(PeerReview.UNAVAILABLE.label);
break;
case "0001":
manifestation.setPeerReview(PeerReview.PEER_REVIEWED.label);
break;
case "0002":
manifestation.setPeerReview(PeerReview.NON_PEER_REVIEWED.label);
break;
}
manifestation.setMetadataCuration("unavailable");
if (Optional.ofNullable(epm.getInstance().getAccessright()).isPresent())
switch (epm.getInstance().getAccessright().getClassid()) {
case "OPEN":
case "OPEN DATA":
case "OPEN SOURCE":
manifestation.setAccessRight(AccessRight.OPEN.label);
break;
case "CLOSED":
manifestation.setAccessRight(AccessRight.CLOSED.label);
break;
case "RESTRICTED":
manifestation.setAccessRight(AccessRight.RESTRICTED.label);
break;
case "EMBARGO":
case "12MONTHS":
case "6MONTHS":
manifestation.setAccessRight(AccessRight.EMBARGO.label);
break;
default:
manifestation.setAccessRight(AccessRight.UNAVAILABLE.label);
}
manifestation.setLicence(epm.getInstance().getLicense().getValue());
manifestation.setUrl(epm.getInstance().getUrl().get(0));
if (Optional.ofNullable(epm.getInstance().getPid()).isPresent()) {
manifestation.setPid(epm.getInstance().getPid().get(0).getValue());
}
if (Optional.ofNullable(t2._2()).isPresent())
manifestation.setBiblio(getBiblio(epm));
manifestation.setVenue("venue_______::" + DHPUtils.md5(epm.getInstance().getHostedby().getKey()));
manifestation
.setHostingDatasource("datasource__::" + DHPUtils.md5(epm.getInstance().getHostedby().getKey()));
return manifestation;
}, Encoders.bean(Manifestation.class))
.collectAsList();
}
private static Biblio getBiblio(EmitPerManifestation epm) {
Biblio biblio = new Biblio();
biblio.setEdition(epm.getJournal().getEdition());
biblio.setIssue(epm.getJournal().getIss());
biblio.setPublisher(epm.getPublisher());
biblio.setVolume(epm.getJournal().getVol());
biblio.setEndPage(epm.getJournal().getEp());
biblio.setStartPage(epm.getJournal().getSp());
return biblio;
}
private static <R extends Result> void dumpResult(SparkSession spark, String inputPath, String workingDir) {
ModelSupport.entityTypes
.keySet()
.parallelStream()
.filter(ModelSupport::isResult)
.forEach(e -> {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
Dataset<R> results = Utils.readPath(spark, inputPath + e.name(), resultClazz);
Dataset<PartialResearchProduct> prr = Utils
.readPath(spark, workingDir + e.name() + "/partialresearchproduct", PartialResearchProduct.class);
results
.joinWith(prr, results.col("id").equalTo(prr.col("resultId")), "left")
.map((MapFunction<Tuple2<R, PartialResearchProduct>, ResearchProduct>) t2 -> {
ResearchProduct rp = ResultMapper.map(t2._1());
rp.setRelatedProducts(t2._2().getRelatedProducts());
rp.setFunding(t2._2().getFunding());
rp.setRelevantOrganizations(t2._2().getRelevantOrganizations());
rp.setManifestations(rp.getManifestations());
return rp;
}, Encoders.bean(ResearchProduct.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir + e.name() + "/researchproduct");
});
}
private static void selectRelations(SparkSession spark, String inputPath, String workingDir) {
Dataset<Relation> relation = spark
.read()
.json(inputPath + "/relation")
.as(Encoders.bean(Relation.class))
.filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true")
.filter(
"relClass == 'HasAuthorInstitution' or relClass == 'IsProducedBy' or " +
"relClass == 'IsSupplementedBy' or relClass == 'IsDocumentedBy' or relClass == 'IsPartOf' " +
"relClass == 'IsNewVersionOf' or relClass == 'Cites'");
relation
.groupByKey((MapFunction<Relation, String>) r -> r.getSource(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Relation, RelationPerProduct>) (k, v) -> {
RelationPerProduct rpp = new RelationPerProduct();
rpp.setResultId(k);
Map<String, List<String>> remainignRelations = new HashMap<>();
while (v.hasNext()) {
Relation rel = v.next();
String target = rel.getTarget();
String relClass = rel.getRelClass();
switch (rel.getRelClass().toLowerCase()) {
case "hasauthorinstitution":
rpp.getOrganizations().add("organization::" + DHPUtils.md5(target));
break;
case "isproducedby":
rpp.getFunding().add("grant_______::" + DHPUtils.md5(target));
break;
default:
if (!remainignRelations.keySet().contains(relClass))
remainignRelations.put(relClass, new ArrayList<>());
remainignRelations.get(relClass).add("product_____::" + DHPUtils.md5(target));
}
}
for (String key : remainignRelations.keySet())
rpp.getRelatedProduct().add(Relations.newInstance(key, remainignRelations.get(key)));
return rpp;
}, Encoders.bean(RelationPerProduct.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir + "/aggrelation");
}
}

View File

@ -0,0 +1,251 @@
package eu.dnetlib.dhp.skgif;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.skgif.Utils.getOrcid;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.skgif.beans.EmitPerManifestation;
import eu.dnetlib.dhp.skgif.model.*;
import eu.dnetlib.dhp.skgif.model.AccessRight;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Array;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 06/02/24
*/
public class EmitFromResults implements Serializable {
private static final Logger log = LoggerFactory.getLogger(EmitFromResults.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/emit_biblio_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String workingDir = parser.get("workingDir");
log.info("workingDir: {}", workingDir);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
emitFromResult(spark, inputPath, outputPath, workingDir);
});
}
//per ogni result emetto id + journal se esiste + istanza + hosted by dell'istanza
public static <R extends Result> void emitFromResult(SparkSession spark, String inputPath, String outputPath,
String workingDir) {
emitManifestation(spark, inputPath, workingDir);
emitPerson(spark, inputPath, outputPath, workingDir);
emitTopic(spark, inputPath, outputPath, workingDir);
}
private static <R extends Result> void emitTopic(SparkSession spark, String inputPath, String outputPath,
String workingDir) {
ModelSupport.entityTypes.keySet().forEach(e -> {
if (ModelSupport.isResult(e)) {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
Utils
.readPath(spark, inputPath + e.name(), resultClazz)
.filter((FilterFunction<R>) r -> Optional.of(r.getSubject()).isPresent())
.flatMap(
(FlatMapFunction<R, Topic>) r -> r
.getSubject()
.stream()
.filter(s -> !s.getQualifier().getClassid().equalsIgnoreCase("keyword"))
.map(s -> {
Topic t = new Topic();
t
.setLocal_identifier(
"topic_______::" + DHPUtils.md5(s.getQualifier().getSchemeid() + s.getValue()));
t
.setIdentifiers(
Arrays
.asList(
Identifier.newInstance(s.getQualifier().getSchemeid(), s.getValue())));
t.setName(s.getValue());
return t;
})
.collect(Collectors.toList())
.iterator(),
Encoders.bean(Topic.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir + e.name() + "/topic");
}
});
Dataset<Topic> topics = spark.emptyDataset(Encoders.bean(Topic.class));
for (EntityType entityType : ModelSupport.entityTypes.keySet()) {
if (ModelSupport.isResult(entityType))
topics = topics.union(Utils.readPath(spark, workingDir + entityType.name() + "/topic", Topic.class));
}
topics
.groupByKey((MapFunction<Topic, String>) p -> p.getLocal_identifier(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Topic, Topic>) (k, v) -> v.next(), Encoders.bean(Topic.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/Topic");
}
private static <R extends Result> void emitPerson(SparkSession spark, String inputPath, String outputPath,
String workingDir) {
ModelSupport.entityTypes.keySet().forEach(e -> {
if (ModelSupport.isResult(e)) {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
Utils
.readPath(spark, inputPath + e.name(), resultClazz)
.flatMap((FlatMapFunction<R, Persons>) r -> {
List<Persons> authors = new ArrayList<>();
if (Optional.ofNullable(r.getAuthor()).isPresent()) {
int count = 0;
for (Author a : r.getAuthor()) {
count += 1;
Persons p = new Persons();
p.setFamilyName(a.getSurname());
p.setGivenName(a.getName());
String identifier = new String();
if (Optional.ofNullable(a.getPid()).isPresent()) {
Tuple2<String, Boolean> orcid = getOrcid(a.getPid());
if (orcid != null) {
identifier = "person______::" + DHPUtils.md5(orcid._1() + orcid._2());
if (orcid._2())
p
.setIdentifiers(
Arrays.asList(Identifier.newInstance("orcid", orcid._1())));
else
p
.setIdentifiers(
Arrays.asList(Identifier.newInstance("orcid_pending", orcid._1())));
} else {
if (Optional.ofNullable(a.getRank()).isPresent()) {
identifier = "tmp_person__::" + DHPUtils.md5(r.getId() + a.getRank());
} else {
identifier = "tmp_person__::" + DHPUtils.md5(r.getId() + count);
}
}
}
p.setLocalIdentifier(identifier);
}
}
return authors.iterator();
}, Encoders.bean(Persons.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir + e.name() + "/person");
}
});
Dataset<Persons> persons = spark.emptyDataset(Encoders.bean(Persons.class));
for (EntityType entityType : ModelSupport.entityTypes.keySet()) {
if (ModelSupport.isResult(entityType))
persons = persons
.union(Utils.readPath(spark, workingDir + entityType.name() + "/person", Persons.class));
}
persons
.groupByKey((MapFunction<Persons, String>) p -> p.getLocalIdentifier(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Persons, Persons>) (k, v) -> v.next(), Encoders.bean(Persons.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/Persons");
}
private static <R extends Result> void emitManifestation(SparkSession spark, String inputPath, String workingDir) {
Dataset<Datasource> datasource = Utils
.readPath(spark, inputPath + "datasource", Datasource.class)
.filter(
(FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEosctype()).isPresent() &&
d.getEosctype().getClassname().equalsIgnoreCase("Journal archive"));
ModelSupport.entityTypes.keySet().forEach(e -> {
if (ModelSupport.isResult(e)) {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
// Dataset<EmitPerManifestation> emitformanifestation =
Utils
.readPath(spark, inputPath + e.name(), resultClazz)
.flatMap((FlatMapFunction<R, EmitPerManifestation>) p -> p.getInstance().stream().map(i -> {
EmitPerManifestation epb = new EmitPerManifestation();
epb.setResultId(p.getId());
epb.setInstance(i);
epb.setHostedBy(i.getHostedby().getKey());
epb
.setPublisher(
Optional
.ofNullable(p.getPublisher())
.map(v -> v.getValue())
.orElse(new String()));
if (p.getClass() == Publication.class) {
epb.setJournal(((Publication) p).getJournal());
}
return epb;
}).collect(Collectors.toList()).iterator(), Encoders.bean(EmitPerManifestation.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir + e.name() + "/manifestation");
;
}
});
}
}

View File

@ -1,77 +0,0 @@
package eu.dnetlib.dhp.skgif;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.skgif.model.ResearchProduct;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.Optional;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
/**
* @author miriam.baglioni
* @Date 06/02/24
*/
public class JournalsFromDatasources implements Serializable {
private static final Logger log = LoggerFactory.getLogger(JournalsFromDatasources.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/journals_from_datasource_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String datasourcePath = parser.get("datasourcePath");
log.info("datasourcePath: {}", datasourcePath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
extendResult(spark, inputPath, outputPath, datasourcePath);
});
}
//find the results having a container in the metadata
//map all the hostedby.key delle istanze associate al risultato
//find a corrispondence to a datasource which is a journal
//write for the result the biblio
public static void extendResult(SparkSession spark, String inputPath, String outputPath, String datasourcePath ){
Dataset<Datasource> datasource = Utils.readPath(spark, datasourcePath, Datasource.class)
.filter((FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEosctype()).isPresent() &&
d.getEosctype().getClassname().equalsIgnoreCase("Journal archive");
Dataset<ResearchProduct> results = Utils.readPath(spark, inputPath, ResearchProduct.class);
}
}

View File

@ -9,6 +9,7 @@ import java.util.*;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.*;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -58,17 +59,23 @@ public class PrepareResultRelation implements Serializable {
}
private static void prepareResultRelationList(SparkSession spark, String inputPath, String outputPath) {
final StructType structureSchema = new StructType()
.fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>");
.add(
"dataInfo", new StructType()
.add("deletedbyinference", DataTypes.BooleanType)
.add("invisible", DataTypes.BooleanType))
.add("id", DataTypes.StringType);
Dataset<Relation> relation = spark
.read()
.json(inputPath)
.as(Encoders.bean(Relation.class))
.filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true")
.filter(
"relClass == 'hasAuthorInstitution' or relClass == 'outcome' or " +
"subRelType == 'affiliation' or subRelType == 'outcome' or " +
"relClass == 'IsSupplementedBy' or relClass == 'IsDocumentedBy' or relClass == 'IsPartOf' " +
"relClass == IsNewVersionOf");
"relClass == 'IsNewVersionOf' or relClass == 'Cites'");
org.apache.spark.sql.Dataset<Row> df = spark.createDataFrame(new ArrayList<Row>(), structureSchema);
List<String> entities = Arrays

View File

@ -41,7 +41,7 @@ public class ResultMapper implements Serializable {
Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype());
if (ort.isPresent()) {
try {
out.setLocalIdentifier(input.getId());
out.setLocalIdentifier("product___::" + DHPUtils.md5(input.getId()));
mapPid(out, input);
mapTitle(out, input);
mapAbstract(out, input);
@ -49,17 +49,7 @@ public class ResultMapper implements Serializable {
mapTopic(out, input);
mapContribution(out, input);
if (!Optional.ofNullable(out.getTitles()).isPresent() ||
!Optional.ofNullable(out.getContributions()).isPresent())
return null;
// TODO map the manifestation directly from the instances
// it is not completed
mapManifestation(out, input);
// TODO extend the mapping to consider relations between these entities and the results
// private List<String> relevantOrganizations;
// private List<String> funding;
// private List<Relations> relatedProducts;
//The manifestation will be included extending the result as well as the relations to funder, organization and other results
} catch (ClassCastException cce) {
return null;
@ -70,101 +60,6 @@ public class ResultMapper implements Serializable {
}
private static <E extends Result> void mapManifestation(ResearchProduct out, E input) {
out
.setManifestations(
input
.getInstance()
.stream()
.parallel()
.map(i -> {
try {
return getManifestation(i);
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
})
.collect(Collectors.toList()));
}
private static Manifestation getManifestation(Instance i) throws MalformedURLException {
Manifestation manifestation = new Manifestation();
manifestation.setProductLocalType(i.getInstancetype().getClassname());
manifestation.setProductLocalTypeSchema(i.getInstancetype().getSchemename());
Dates dates = new Dates();
dates.setType("publishing");
dates.setValue(i.getDateofacceptance().getValue());
manifestation.setDates(Arrays.asList(dates));
switch (i.getRefereed().getClassid()) {
case "0000":
manifestation.setPeerReview(PeerReview.UNAVAILABLE.label);
break;
case "0001":
manifestation.setPeerReview(PeerReview.PEER_REVIEWED.label);
break;
case "0002":
manifestation.setPeerReview(PeerReview.NON_PEER_REVIEWED.label);
break;
}
manifestation.setMetadataCuration(MetadataCuration.UNAVAILABLE.label);
// TODO filter out the URL that refer to pids. If nothing remains, decide what to do
manifestation.setUrl(new URL(i.getUrl().get(0)));
if (Optional.ofNullable(i.getPid()).isPresent()) {
manifestation.setPid(i.getPid().get(0).getValue());
}
switch (i.getAccessright().getClassid()) {
case "OPEN":
case "OPEN DATA":
case "OPEN SOURCE":
manifestation.setAccessRight(AccessRight.OPEN.label);
break;
case "CLOSED":
manifestation.setAccessRight(AccessRight.CLOSED.label);
break;
case "RESTRICTED":
manifestation.setAccessRight(AccessRight.RESTRICTED.label);
break;
case "EMBARGO":
case "12MONTHS":
case "6MONTHS":
manifestation.setAccessRight(AccessRight.EMBARGO.label);
break;
default:
manifestation.setAccessRight(AccessRight.UNAVAILABLE.label);
}
if (Optional.ofNullable(i.getLicense()).isPresent())
manifestation.setLicence(i.getLicense().getValue());
// TODO to fill the biblio in case it is a journal, we need to join with the datasource and verify the type
Biblio biblio = null;
manifestation.setHostingDatasource(i.getHostedby().getKey());
// TODO verify if the result is published in ojournal or conferences. In that case the venue is the identifier
// of the journal/conference. In case it is not, the venue is the datasource
if (biblio == null) {
manifestation.setVenue(i.getHostedby().getKey());
} else {
manifestation.setVenue("insert the id of the venue");
}
return manifestation;
}
private static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
if (!Optional.ofNullable(pid).isPresent())
return null;
if (pid.size() == 0)
return null;
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
return new Tuple2<>(p.getValue(), Boolean.TRUE);
}
}
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) {
return new Tuple2<>(p.getValue(), Boolean.FALSE);
}
}
return null;
}
private static <E extends Result> void mapContribution(ResearchProduct out, E input) {
if (Optional.ofNullable(input.getAuthor()).isPresent()) {
int count = 0;
@ -172,14 +67,14 @@ public class ResultMapper implements Serializable {
count += 1;
Contribution contribution = new Contribution();
if (Optional.ofNullable(a.getPid()).isPresent()) {
Tuple2<String, Boolean> orcid = getOrcid(a.getPid());
Tuple2<String, Boolean> orcid = Utils.getOrcid(a.getPid());
if (orcid != null) {
contribution.setPerson("person______::"+DHPUtils.md5(orcid._1() + orcid._2()));
contribution.setPerson("person______::" + DHPUtils.md5(orcid._1() + orcid._2()));
} else {
if (Optional.ofNullable(a.getRank()).isPresent()) {
contribution.setPerson("person______::"+DHPUtils.md5(input.getId() + a.getRank()));
contribution.setPerson("person______::" + DHPUtils.md5(input.getId() + a.getRank()));
} else {
contribution.setPerson("tmp_person__::"+DHPUtils.md5(input.getId() + count));
contribution.setPerson("tmp_person__::" + DHPUtils.md5(input.getId() + count));
}
}
@ -187,32 +82,31 @@ public class ResultMapper implements Serializable {
}
}
// "contributions": [
// {
// "person": "person_123",
// "declared_affiliations": ["org_1", "org_3"],
// "rank": 1,
// "roles": ["writing-original-draft", "conceptualization"]
// }
// ]
}
private static <E extends Result> void mapTopic(ResearchProduct out, E input) {
if (Optional.ofNullable(input.getSubject()).isPresent()) {
out.setTopics(input.getSubject().stream().parallel().map(s -> {
Topic topic = new Topic();
out
.setTopics(
input
.getSubject()
.stream()
.filter(s -> !s.getQualifier().getClassid().equalsIgnoreCase("keyword"))
.map(s -> {
ResultTopic topic = new ResultTopic();
topic.setTopic(getIdentifier(s));
Provenance provenance = new Provenance();
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
provenance.setType(s.getDataInfo().getInferenceprovenance());
topic.setProvenance(provenance);
return topic;
}).collect(Collectors.toList()));
})
.collect(Collectors.toList()));
}
}
private static String getIdentifier(StructuredProperty s) {
return DHPUtils.md5(s.getQualifier().getClassid() + s.getValue());
return "topic_______::" + DHPUtils.md5(s.getQualifier().getClassid() + s.getValue());
}
private static <E extends Result> void mapType(ResearchProduct out, E input) throws NoAllowedTypeException {

View File

@ -0,0 +1,36 @@
package eu.dnetlib.dhp.skgif;
import java.io.Serializable;
import java.util.List;
import java.util.Optional;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 16/02/24
*/
public class Utils implements Serializable {
public static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
if (!Optional.ofNullable(pid).isPresent())
return null;
if (pid.size() == 0)
return null;
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
return new Tuple2<>(p.getValue(), Boolean.TRUE);
}
}
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) {
return new Tuple2<>(p.getValue(), Boolean.FALSE);
}
}
return null;
}
}

View File

@ -0,0 +1,60 @@
package eu.dnetlib.dhp.skgif.beans;
import java.io.Serializable;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.skgif.model.Biblio;
/**
* @author miriam.baglioni
* @Date 15/02/24
*/
public class EmitPerManifestation implements Serializable {
private String resultId;
private String hostedBy;
private Journal journal;
private Instance instance;
private String publisher;
public String getPublisher() {
return publisher;
}
public void setPublisher(String publisher) {
this.publisher = publisher;
}
public String getResultId() {
return resultId;
}
public void setResultId(String resultId) {
this.resultId = resultId;
}
public String getHostedBy() {
return hostedBy;
}
public void setHostedBy(String hostedBy) {
this.hostedBy = hostedBy;
}
public Journal getJournal() {
return journal;
}
public void setJournal(Journal journal) {
this.journal = journal;
}
public Instance getInstance() {
return instance;
}
public void setInstance(Instance instance) {
this.instance = instance;
}
}

View File

@ -0,0 +1,20 @@
package eu.dnetlib.dhp.skgif.beans;
import eu.dnetlib.dhp.skgif.model.ResearchProduct;
/**
* @author miriam.baglioni
* @Date 16/02/24
*/
public class PartialResearchProduct extends ResearchProduct {
private String resultId;
public String getResultId() {
return resultId;
}
public void setResultId(String resultId) {
this.resultId = resultId;
}
}

View File

@ -0,0 +1,58 @@
package eu.dnetlib.dhp.skgif.beans;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import eu.dnetlib.dhp.skgif.model.Relations;
/**
* @author miriam.baglioni
* @Date 16/02/24
*/
public class RelationPerProduct implements Serializable {
private String resultId;
private List<String> organizations;
private List<String> funding;
private List<Relations> relatedProduct;
public RelationPerProduct() {
organizations = new ArrayList<>();
funding = new ArrayList<>();
relatedProduct = new ArrayList<>();
}
public String getResultId() {
return resultId;
}
public void setResultId(String resultId) {
this.resultId = resultId;
}
public List<String> getOrganizations() {
return organizations;
}
public void setOrganizations(List<String> organizations) {
this.organizations = organizations;
}
public List<String> getFunding() {
return funding;
}
public void setFunding(List<String> funding) {
this.funding = funding;
}
public List<Relations> getRelatedProduct() {
return relatedProduct;
}
public void setRelatedProduct(List<Relations> relatedProduct) {
this.relatedProduct = relatedProduct;
}
}

View File

@ -0,0 +1,783 @@
package eu.dnetlib.dhp.oa.graph.dump.skgif;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts;
import eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob;
import eu.dnetlib.dhp.oa.model.Instance;
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
import eu.dnetlib.dhp.oa.model.Score;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.skgif.EmitFromResults;
import eu.dnetlib.dhp.skgif.beans.EmitPerManifestation;
import eu.dnetlib.dhp.skgif.model.Manifestation;
import eu.dnetlib.dhp.skgif.model.Persons;
import eu.dnetlib.dhp.skgif.model.ResultTopic;
//@Disabled
public class EmitFromResultJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(EmitFromResultJobTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(EmitFromResultJobTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(EmitFromResultJobTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(EmitFromResultJobTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
public void testEmitFromResult() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
.getPath();
EmitFromResults
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-outputPath", workingDir.toString() + "/result/",
"-workingDir", workingDir.toString() + "/"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Persons> persons = sc
.textFile(workingDir.toString() + "/result/Persons")
.map(item -> OBJECT_MAPPER.readValue(item, Persons.class));
JavaRDD<ResultTopic> topics = sc
.textFile(workingDir.toString() + "/result/Topic")
.map(item -> OBJECT_MAPPER.readValue(item, ResultTopic.class));
JavaRDD<EmitPerManifestation> manifestation = sc
.textFile(workingDir.toString() + "/result/Persons")
.map(item -> OBJECT_MAPPER.readValue(item, EmitPerManifestation.class));
org.apache.spark.sql.Dataset<Persons> personsDataset = spark
.createDataset(persons.rdd(), Encoders.bean(Persons.class));
personsDataset.show(false);
}
@Test
public void testDatasetDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'dataset'").count());
// the common fields in the result have been already checked. Now checking only
// community specific fields
GraphResult gr = verificationDataset.first();
Assertions.assertEquals(2, gr.getGeolocation().size());
Assertions.assertEquals(2, gr.getGeolocation().stream().filter(gl -> gl.getBox().equals("")).count());
Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPlace().equals("")).count());
Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("")).count());
Assertions
.assertEquals(
1,
gr
.getGeolocation()
.stream()
.filter(gl -> gl.getPlace().equals("18 York St, Ottawa, ON K1N 5S6; Ottawa; Ontario; Canada"))
.count());
Assertions
.assertEquals(
1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("45.427242 -75.693904")).count());
Assertions
.assertEquals(
1,
gr
.getGeolocation()
.stream()
.filter(gl -> gl.getPoint().equals("") && !gl.getPlace().equals(""))
.count());
Assertions
.assertEquals(
1,
gr
.getGeolocation()
.stream()
.filter(gl -> !gl.getPoint().equals("") && gl.getPlace().equals(""))
.count());
Assertions.assertEquals("1024Gb", gr.getSize());
Assertions.assertEquals("1.01", gr.getVersion());
Assertions.assertEquals(null, gr.getContainer());
Assertions.assertEquals(null, gr.getCodeRepositoryUrl());
Assertions.assertEquals(null, gr.getProgrammingLanguage());
Assertions.assertEquals(null, gr.getDocumentationUrl());
Assertions.assertEquals(null, gr.getContactperson());
Assertions.assertEquals(null, gr.getContactgroup());
Assertions.assertEquals(null, gr.getTool());
}
@Test
public void testSoftwareDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'software'").count());
GraphResult gr = verificationDataset.first();
Assertions.assertEquals(2, gr.getDocumentationUrl().size());
Assertions.assertTrue(gr.getDocumentationUrl().contains("doc_url_1"));
Assertions.assertTrue(gr.getDocumentationUrl().contains("doc_url_2"));
Assertions.assertEquals("code_repo", gr.getCodeRepositoryUrl());
Assertions.assertEquals("perl", gr.getProgrammingLanguage());
Assertions.assertEquals(null, gr.getContainer());
Assertions.assertEquals(null, gr.getContactperson());
Assertions.assertEquals(null, gr.getContactgroup());
Assertions.assertEquals(null, gr.getTool());
Assertions.assertEquals(null, gr.getGeolocation());
Assertions.assertEquals(null, gr.getSize());
Assertions.assertEquals(null, gr.getVersion());
}
@Test
public void testOrpDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'other'").count());
GraphResult gr = verificationDataset.first();
Assertions.assertEquals(2, gr.getContactperson().size());
Assertions.assertTrue(gr.getContactperson().contains(("contact_person1")));
Assertions.assertTrue(gr.getContactperson().contains(("contact_person2")));
Assertions.assertEquals(1, gr.getContactgroup().size());
Assertions.assertTrue(gr.getContactgroup().contains(("contact_group")));
Assertions.assertEquals(2, gr.getTool().size());
Assertions.assertTrue(gr.getTool().contains("tool1"));
Assertions.assertTrue(gr.getTool().contains("tool2"));
Assertions.assertEquals(null, gr.getContainer());
Assertions.assertEquals(null, gr.getDocumentationUrl());
Assertions.assertEquals(null, gr.getCodeRepositoryUrl());
Assertions.assertEquals(null, gr.getProgrammingLanguage());
Assertions.assertEquals(null, gr.getGeolocation());
Assertions.assertEquals(null, gr.getSize());
Assertions.assertEquals(null, gr.getVersion());
}
@Test
public void testPublicationDumpCommunity() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'publication'").count());
// the common fields in the result have been already checked. Now checking only
// community specific fields
CommunityResult cr = verificationDataset.first();
Assertions.assertEquals(1, cr.getContext().size());
Assertions.assertEquals("dh-ch", cr.getContext().get(0).getCode());
Assertions.assertEquals("Digital Humanities and Cultural Heritage", cr.getContext().get(0).getLabel());
Assertions.assertEquals(1, cr.getContext().get(0).getProvenance().size());
Assertions.assertEquals("Inferred by OpenAIRE", cr.getContext().get(0).getProvenance().get(0).getProvenance());
Assertions.assertEquals("0.9", cr.getContext().get(0).getProvenance().get(0).getTrust());
Assertions.assertEquals(1, cr.getCollectedfrom().size());
Assertions
.assertEquals("openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getCollectedfrom().get(0).getKey());
Assertions.assertEquals("Pensoft", cr.getCollectedfrom().get(0).getValue());
Assertions.assertEquals(1, cr.getInstance().size());
Assertions
.assertEquals(
"openaire____::fdc7e0400d8c1634cdaf8051dbae23db",
cr.getInstance().get(0).getCollectedfrom().getKey());
Assertions.assertEquals("Pensoft", cr.getInstance().get(0).getCollectedfrom().getValue());
Assertions
.assertEquals(
"openaire____::e707e544b9a5bd23fc27fbfa65eb60dd", cr.getInstance().get(0).getHostedby().getKey());
Assertions.assertEquals("One Ecosystem", cr.getInstance().get(0).getHostedby().getValue());
}
@Test
public void testDataset() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(90, verificationDataset.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset
.filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'")
.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_16ec'").count() == verificationDataset
.filter("bestAccessright.code = 'c_16ec' and bestAccessright.label = 'RESTRICTED'")
.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_14cb'").count() == verificationDataset
.filter("bestAccessright.code = 'c_14cb' and bestAccessright.label = 'CLOSED'")
.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_f1cf'").count() == verificationDataset
.filter("bestAccessright.code = 'c_f1cf' and bestAccessright.label = 'EMBARGO'")
.count());
Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90);
Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90);
}
@Test
public void testDataset2All() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(5, verificationDataset.count());
}
@Test
public void testDataset2Communities() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(0, verificationDataset.count());
}
@Test
public void testPublication() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(74, verificationDataset.count());
verificationDataset.show(false);
Assertions.assertEquals(74, verificationDataset.filter("type = 'publication'").count());
}
@Test
public void testSoftware() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(6, verificationDataset.count());
Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count());
}
@Test
public void testORP() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(3, verificationDataset.count());
Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count());
}
@Test
public void testRecord() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(2, verificationDataset.count());
verificationDataset.show(false);
Assertions.assertEquals(2, verificationDataset.filter("type = 'publication'").count());
}
@Test
public void testArticlePCA() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(23, verificationDataset.count());
Assertions.assertEquals(23, verificationDataset.filter("type = 'publication'").count());
verificationDataset.createOrReplaceTempView("check");
org.apache.spark.sql.Dataset<Row> temp = spark
.sql(
"select id " +
"from check " +
"lateral view explode (instance) i as inst " +
"where inst.articleprocessingcharge is not null");
Assertions.assertTrue(temp.count() == 2);
Assertions.assertTrue(temp.filter("id = 'datacite____::05c611fdfc93d7a2a703d1324e28104a'").count() == 1);
Assertions.assertTrue(temp.filter("id = 'dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1);
temp = spark
.sql(
"select id, inst.articleprocessingcharge.amount, inst.articleprocessingcharge.currency " +
"from check " +
"lateral view explode (instance) i as inst " +
"where inst.articleprocessingcharge is not null");
Assertions
.assertEquals(
"3131.64",
temp
.filter("id = 'datacite____::05c611fdfc93d7a2a703d1324e28104a'")
.collectAsList()
.get(0)
.getString(1));
Assertions
.assertEquals(
"EUR",
temp
.filter("id = 'datacite____::05c611fdfc93d7a2a703d1324e28104a'")
.collectAsList()
.get(0)
.getString(2));
Assertions
.assertEquals(
"2578.35",
temp
.filter("id = 'dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'")
.collectAsList()
.get(0)
.getString(1));
Assertions
.assertEquals(
"EUR",
temp
.filter("id = 'dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'")
.collectAsList()
.get(0)
.getString(2));
}
@Test
public void testresultNotDumped() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/resultNotDumped.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
Assertions.assertEquals(0, tmp.count());
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long