[Dump Subset] first try fro dump subset and refactoring

This commit is contained in:
Miriam Baglioni 2022-11-17 16:13:10 +01:00
parent 447af1a851
commit 8878b96204
67 changed files with 3546 additions and 335 deletions

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.model.graph;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
@ -12,6 +13,8 @@ import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class ResearchCommunity extends ResearchInitiative {
@JsonSchema(
description = "Only for research communities: the list of the subjects associated to the research community")
@JsonInclude(JsonInclude.Include.NON_NULL)
private List<String> subject;
public List<String> getSubject() {

View File

@ -53,7 +53,11 @@
<artifactId>dump-schema</artifactId>
<version>1.2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>io.github.classgraph</groupId>
<artifactId>classgraph</artifactId>
<version>4.8.71</version>
</dependency>
</dependencies>

View File

@ -1,114 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
/**
* It fires the execution of the actual dump for result entities. If the dump is for RC/RI products its checks for each
* result its belongingess to at least one RC/RI before "asking" for its mapping.
*/
public class DumpProducts implements Serializable {
public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath,
Class<? extends OafEntity> inputClazz,
Class<? extends Result> outputClazz,
String dumpType) {
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
execDump(
spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType);
});
}
public static <I extends OafEntity, O extends Result> void execDump(
SparkSession spark,
String inputPath,
String outputPath,
String communityMapPath,
Class<I> inputClazz,
Class<O> outputClazz,
String dumpType) {
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
Utils
.readPath(spark, inputPath, inputClazz)
.map((MapFunction<I, O>) value -> execMap(value, communityMap, dumpType), Encoders.bean(outputClazz))
.filter((FilterFunction<O>) value -> value != null)
.map((MapFunction<O, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.text(outputPath);
}
private static <I extends OafEntity, O extends Result> O execMap(I value,
CommunityMap communityMap,
String dumpType) throws NoAvailableEntityTypeException, CardinalityTooHighException {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
if (odInfo.isPresent()) {
if (odInfo.get().getDeletedbyinference() || odInfo.get().getInvisible()) {
return null;
}
} else {
return null;
}
if (Constants.DUMPTYPE.COMMUNITY.getType().equals(dumpType)) {
Set<String> communities = communityMap.keySet();
Optional<List<Context>> inputContext = Optional
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
if (!inputContext.isPresent()) {
return null;
}
List<String> toDumpFor = inputContext.get().stream().map(c -> {
if (communities.contains(c.getId())) {
return c.getId();
}
if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
return c.getId().substring(0, c.getId().indexOf("::"));
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
if (toDumpFor.isEmpty()) {
return null;
}
}
return (O) ResultMapper.map(value, communityMap, dumpType);
}
}

View File

@ -19,7 +19,6 @@ import eu.dnetlib.dhp.oa.model.Author;
import eu.dnetlib.dhp.oa.model.GeoLocation;
import eu.dnetlib.dhp.oa.model.Indicator;
import eu.dnetlib.dhp.oa.model.Instance;
import eu.dnetlib.dhp.oa.model.Measure;
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.community.CfHbKeyValue;

View File

@ -1,16 +1,35 @@
package eu.dnetlib.dhp.oa.graph.dump.community;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Result;
/**
@ -48,20 +67,79 @@ public class SparkDumpCommunityProducts implements Serializable {
String communityMapPath = parser.get("communityMapPath");
final String dumpType = Optional
.ofNullable(parser.get("dumpType"))
.map(String::valueOf)
.orElse("community");
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
DumpProducts dump = new DumpProducts();
SparkConf conf = new SparkConf();
dump
.run(
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, CommunityResult.class,
dumpType);
String finalCommunityMapPath = communityMapPath;
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
resultDump(
spark, inputPath, outputPath, finalCommunityMapPath, inputClazz);
});
}
public static <I extends OafEntity> void resultDump(
SparkSession spark,
String inputPath,
String outputPath,
String communityMapPath,
Class<I> inputClazz) {
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<I, CommunityResult>) value -> execMap(value, communityMap),
Encoders.bean(CommunityResult.class))
.filter((FilterFunction<CommunityResult>) value -> value != null)
.map(
(MapFunction<CommunityResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.text(outputPath);
}
private static <I extends OafEntity, O extends eu.dnetlib.dhp.oa.model.Result> O execMap(I value,
CommunityMap communityMap) throws NoAvailableEntityTypeException, CardinalityTooHighException {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
if (Boolean.FALSE.equals(odInfo.isPresent())) {
return null;
}
if (Boolean.TRUE.equals(odInfo.get().getDeletedbyinference())
|| Boolean.TRUE.equals(odInfo.get().getInvisible())) {
return null;
}
Set<String> communities = communityMap.keySet();
Optional<List<Context>> inputContext = Optional
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
if (!inputContext.isPresent()) {
return null;
}
List<String> toDumpFor = inputContext.get().stream().map(c -> {
if (communities.contains(c.getId())) {
return c.getId();
}
if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
return c.getId().substring(0, c.getId().indexOf("::"));
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
if (toDumpFor.isEmpty()) {
return null;
}
return (O) ResultMapper.map(value, communityMap, Constants.DUMPTYPE.COMMUNITY.getType());
}
}

View File

@ -44,7 +44,6 @@ public class Extractor implements Serializable {
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
extractRelationResult(
spark, inputPath, outputPath, inputClazz, Utils.getCommunityMap(spark, communityMapPath));
});
@ -116,7 +115,7 @@ public class Extractor implements Serializable {
}, Encoders.bean(Relation.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.mode(SaveMode.Append)
.json(outputPath);
}

View File

@ -62,6 +62,28 @@ public class QueryInformationSystem {
}
public List<ContextInfo> getContextInformation() throws ISLookUpException {
List<ContextInfo> ret = new ArrayList<>();
isLookUp
.quickSearchProfile(XQUERY_ENTITY)
.forEach(c -> {
ContextInfo cinfo = new ContextInfo();
String[] cSplit = c.split("@@");
cinfo.setId(cSplit[0]);
cinfo.setName(cSplit[1]);
cinfo.setDescription(cSplit[2]);
if (!cSplit[3].trim().equals("")) {
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
}
cinfo.setZenodocommunity(cSplit[4]);
cinfo.setType(cSplit[5]);
ret.add(cinfo);
});
return ret;
}
public List<String> getContextRelationResult() {
return contextRelationResult;
}

View File

@ -78,7 +78,8 @@ public class SparkCollectAndSave implements Serializable {
.union(Utils.readPath(spark, inputPath + "/result/dataset", GraphResult.class))
.union(Utils.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class))
.union(Utils.readPath(spark, inputPath + "/result/software", GraphResult.class))
.map((MapFunction<GraphResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING() )
.map(
(MapFunction<GraphResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
@ -126,4 +127,5 @@ public class SparkCollectAndSave implements Serializable {
.mode(SaveMode.Overwrite)
.text(outputPath);
}
}

View File

@ -1,21 +1,59 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import java.io.StringReader;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.model.Container;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.oa.model.graph.Datasource;
import eu.dnetlib.dhp.oa.model.graph.Organization;
import eu.dnetlib.dhp.oa.model.graph.Project;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
/**
* Spark Job that fires the dump for the entites
*/
public class SparkDumpEntitiesJob implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkDumpEntitiesJob.class);
public static final String COMPRESSION = "compression";
public static final String GZIP = "gzip";
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
@ -42,13 +80,543 @@ public class SparkDumpEntitiesJob implements Serializable {
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final String communityMapPath = parser.get("communityMapPath");
Optional<String> communityMap = Optional.ofNullable(parser.get("communityMapPath"));
String communityMapPath = null;
if (communityMap.isPresent())
communityMapPath = communityMap.get();
Class<? extends OafEntity> inputClazz = (Class<? extends OafEntity>) Class.forName(resultClassName);
DumpGraphEntities dg = new DumpGraphEntities();
dg.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
run(isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz);
}
private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath,
Class<? extends OafEntity> inputClazz) {
SparkConf conf = new SparkConf();
switch (ModelSupport.idPrefixMap.get(inputClazz)) {
case "50":
String finalCommunityMapPath = communityMapPath;
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
resultDump(
spark, inputPath, outputPath, finalCommunityMapPath, inputClazz);
});
break;
case "40":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
projectMap(spark, inputPath, outputPath, inputClazz);
});
break;
case "20":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
organizationMap(spark, inputPath, outputPath, inputClazz);
});
break;
case "10":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
datasourceMap(spark, inputPath, outputPath, inputClazz);
});
break;
}
}
public static <I extends OafEntity> void resultDump(
SparkSession spark,
String inputPath,
String outputPath,
String communityMapPath,
Class<I> inputClazz) {
CommunityMap communityMap = null;
if (!StringUtils.isEmpty(communityMapPath))
communityMap = Utils.getCommunityMap(spark, communityMapPath);
CommunityMap finalCommunityMap = communityMap;
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<I, GraphResult>) value -> execMap(value, finalCommunityMap),
Encoders.bean(GraphResult.class))
.filter((FilterFunction<GraphResult>) value -> value != null)
.map((MapFunction<GraphResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
.write()
.mode(SaveMode.Overwrite)
.option(COMPRESSION, GZIP)
.text(outputPath);
}
private static <I extends OafEntity, O extends Result> O execMap(I value,
CommunityMap communityMap) throws NoAvailableEntityTypeException, CardinalityTooHighException {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
if (Boolean.FALSE.equals(odInfo.isPresent())) {
return null;
}
if (Boolean.TRUE.equals(odInfo.get().getDeletedbyinference())
|| Boolean.TRUE.equals(odInfo.get().getInvisible())) {
return null;
}
return (O) ResultMapper.map(value, communityMap, Constants.DUMPTYPE.COMPLETE.getType());
}
private static <E extends OafEntity> void datasourceMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Datasource>) d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d),
Encoders.bean(Datasource.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option(COMPRESSION, GZIP)
.json(outputPath);
}
private static <E extends OafEntity> void projectMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Project>) p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p),
Encoders.bean(Project.class))
.write()
.mode(SaveMode.Overwrite)
.option(COMPRESSION, GZIP)
.json(outputPath);
}
private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
Datasource datasource = new Datasource();
datasource.setId(d.getId());
Optional
.ofNullable(d.getOriginalId())
.ifPresent(
oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
Optional
.ofNullable(d.getPid())
.ifPresent(
pids -> datasource
.setPid(
pids
.stream()
.map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList())));
Optional
.ofNullable(d.getDatasourcetype())
.ifPresent(
dsType -> datasource
.setDatasourcetype(DatasourceSchemeValue.newInstance(dsType.getClassid(), dsType.getClassname())));
Optional
.ofNullable(d.getOpenairecompatibility())
.ifPresent(v -> datasource.setOpenairecompatibility(v.getClassname()));
Optional
.ofNullable(d.getOfficialname())
.ifPresent(oname -> datasource.setOfficialname(oname.getValue()));
Optional
.ofNullable(d.getEnglishname())
.ifPresent(ename -> datasource.setEnglishname(ename.getValue()));
Optional
.ofNullable(d.getWebsiteurl())
.ifPresent(wsite -> datasource.setWebsiteurl(wsite.getValue()));
Optional
.ofNullable(d.getLogourl())
.ifPresent(lurl -> datasource.setLogourl(lurl.getValue()));
Optional
.ofNullable(d.getDateofvalidation())
.ifPresent(dval -> datasource.setDateofvalidation(dval.getValue()));
Optional
.ofNullable(d.getDescription())
.ifPresent(dex -> datasource.setDescription(dex.getValue()));
Optional
.ofNullable(d.getSubjects())
.ifPresent(
sbjs -> datasource.setSubjects(sbjs.stream().map(sbj -> sbj.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getOdpolicies())
.ifPresent(odp -> datasource.setPolicies(Arrays.asList(odp.getValue())));
Optional
.ofNullable(d.getOdlanguages())
.ifPresent(
langs -> datasource
.setLanguages(langs.stream().map(lang -> lang.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getOdcontenttypes())
.ifPresent(
ctypes -> datasource
.setContenttypes(ctypes.stream().map(ctype -> ctype.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getReleasestartdate())
.ifPresent(rd -> datasource.setReleasestartdate(rd.getValue()));
Optional
.ofNullable(d.getReleaseenddate())
.ifPresent(ed -> datasource.setReleaseenddate(ed.getValue()));
Optional
.ofNullable(d.getMissionstatementurl())
.ifPresent(ms -> datasource.setMissionstatementurl(ms.getValue()));
Optional
.ofNullable(d.getDatabaseaccesstype())
.ifPresent(ar -> datasource.setAccessrights(ar.getValue()));
Optional
.ofNullable(d.getDatauploadtype())
.ifPresent(dut -> datasource.setUploadrights(dut.getValue()));
Optional
.ofNullable(d.getDatabaseaccessrestriction())
.ifPresent(dar -> datasource.setDatabaseaccessrestriction(dar.getValue()));
Optional
.ofNullable(d.getDatauploadrestriction())
.ifPresent(dur -> datasource.setDatauploadrestriction(dur.getValue()));
Optional
.ofNullable(d.getVersioning())
.ifPresent(v -> datasource.setVersioning(v.getValue()));
Optional
.ofNullable(d.getCitationguidelineurl())
.ifPresent(cu -> datasource.setCitationguidelineurl(cu.getValue()));
Optional
.ofNullable(d.getPidsystems())
.ifPresent(ps -> datasource.setPidsystems(ps.getValue()));
Optional
.ofNullable(d.getCertificates())
.ifPresent(c -> datasource.setCertificates(c.getValue()));
Optional
.ofNullable(d.getPolicies())
.ifPresent(ps -> datasource.setPolicies(ps.stream().map(p -> p.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getJournal())
.ifPresent(j -> datasource.setJournal(getContainer(j)));
return datasource;
}
private static Container getContainer(Journal j) {
Container c = new Container();
Optional
.ofNullable(j.getName())
.ifPresent(n -> c.setName(n));
Optional
.ofNullable(j.getIssnPrinted())
.ifPresent(issnp -> c.setIssnPrinted(issnp));
Optional
.ofNullable(j.getIssnOnline())
.ifPresent(issno -> c.setIssnOnline(issno));
Optional
.ofNullable(j.getIssnLinking())
.ifPresent(isnl -> c.setIssnLinking(isnl));
Optional
.ofNullable(j.getEp())
.ifPresent(ep -> c.setEp(ep));
Optional
.ofNullable(j.getIss())
.ifPresent(iss -> c.setIss(iss));
Optional
.ofNullable(j.getSp())
.ifPresent(sp -> c.setSp(sp));
Optional
.ofNullable(j.getVol())
.ifPresent(vol -> c.setVol(vol));
Optional
.ofNullable(j.getEdition())
.ifPresent(edition -> c.setEdition(edition));
Optional
.ofNullable(j.getConferencedate())
.ifPresent(cdate -> c.setConferencedate(cdate));
Optional
.ofNullable(j.getConferenceplace())
.ifPresent(cplace -> c.setConferenceplace(cplace));
return c;
}
private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException {
Project project = new Project();
Optional
.ofNullable(p.getId())
.ifPresent(id -> project.setId(id));
Optional
.ofNullable(p.getWebsiteurl())
.ifPresent(w -> project.setWebsiteurl(w.getValue()));
Optional
.ofNullable(p.getCode())
.ifPresent(code -> project.setCode(code.getValue()));
Optional
.ofNullable(p.getAcronym())
.ifPresent(acronynim -> project.setAcronym(acronynim.getValue()));
Optional
.ofNullable(p.getTitle())
.ifPresent(title -> project.setTitle(title.getValue()));
Optional
.ofNullable(p.getStartdate())
.ifPresent(sdate -> project.setStartdate(sdate.getValue()));
Optional
.ofNullable(p.getEnddate())
.ifPresent(edate -> project.setEnddate(edate.getValue()));
Optional
.ofNullable(p.getCallidentifier())
.ifPresent(cide -> project.setCallidentifier(cide.getValue()));
Optional
.ofNullable(p.getKeywords())
.ifPresent(key -> project.setKeywords(key.getValue()));
Optional<Field<String>> omandate = Optional.ofNullable(p.getOamandatepublications());
Optional<Field<String>> oecsc39 = Optional.ofNullable(p.getEcsc39());
boolean mandate = false;
if (omandate.isPresent()) {
if (omandate.get().getValue().equals("true")) {
mandate = true;
}
}
if (oecsc39.isPresent()) {
if (oecsc39.get().getValue().equals("true")) {
mandate = true;
}
}
project.setOpenaccessmandateforpublications(mandate);
project.setOpenaccessmandatefordataset(false);
Optional
.ofNullable(p.getEcarticle29_3())
.ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true")));
project
.setSubject(
Optional
.ofNullable(p.getSubjects())
.map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList()))
.orElse(new ArrayList<>()));
Optional
.ofNullable(p.getSummary())
.ifPresent(summary -> project.setSummary(summary.getValue()));
Optional<Float> ofundedamount = Optional.ofNullable(p.getFundedamount());
Optional<Field<String>> ocurrency = Optional.ofNullable(p.getCurrency());
Optional<Float> ototalcost = Optional.ofNullable(p.getTotalcost());
if (ocurrency.isPresent()) {
if (ofundedamount.isPresent()) {
if (ototalcost.isPresent()) {
project
.setGranted(
Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get()));
} else {
project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get()));
}
}
}
project
.setH2020programme(
Optional
.ofNullable(p.getH2020classification())
.map(
classification -> classification
.stream()
.map(
c -> Programme
.newInstance(
c.getH2020Programme().getCode(), c.getH2020Programme().getDescription()))
.collect(Collectors.toList()))
.orElse(new ArrayList<>()));
Optional<List<Field<String>>> ofundTree = Optional
.ofNullable(p.getFundingtree());
List<Funder> funList = new ArrayList<>();
if (ofundTree.isPresent()) {
for (Field<String> fundingtree : ofundTree.get()) {
funList.add(getFunder(fundingtree.getValue()));
}
}
project.setFunding(funList);
return project;
}
public static Funder getFunder(String fundingtree) throws DocumentException {
Funder f = new Funder();
final Document doc;
doc = new SAXReader().read(new StringReader(fundingtree));
f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText());
f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
String id = "";
StringBuilder bld = new StringBuilder();
int level = 0;
List<org.dom4j.Node> nodes = doc.selectNodes("//funding_level_" + level);
while (!nodes.isEmpty()) {
for (org.dom4j.Node n : nodes) {
List node = n.selectNodes("./id");
id = ((org.dom4j.Node) node.get(0)).getText();
id = id.substring(id.indexOf("::") + 2);
node = n.selectNodes("./description");
bld.append(((Node) node.get(0)).getText() + " - ");
}
level += 1;
nodes = doc.selectNodes("//funding_level_" + level);
}
String description = bld.toString();
if (!id.equals("")) {
Fundings fundings = new Fundings();
fundings.setId(id);
fundings.setDescription(description.substring(0, description.length() - 3).trim());
f.setFunding_stream(fundings);
}
return f;
}
private static <E extends OafEntity> void organizationMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Dataset<Organization> tmp = Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
Encoders.bean(Organization.class));
tmp.foreach((ForeachFunction<Organization>) o -> System.out.println(new ObjectMapper().writeValueAsString(o)));
tmp.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option(COMPRESSION, GZIP)
.json(outputPath);
}
private static eu.dnetlib.dhp.oa.model.graph.Organization mapOrganization(
eu.dnetlib.dhp.schema.oaf.Organization org) {
if (Boolean.TRUE.equals(org.getDataInfo().getDeletedbyinference()))
return null;
Organization organization = new Organization();
Optional
.ofNullable(org.getLegalshortname())
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
Optional
.ofNullable(org.getLegalname())
.ifPresent(value -> organization.setLegalname(value.getValue()));
Optional
.ofNullable(org.getWebsiteurl())
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
Optional
.ofNullable(org.getAlternativeNames())
.ifPresent(
value -> organization
.setAlternativenames(
value
.stream()
.map(v -> v.getValue())
.collect(Collectors.toList())));
Optional
.ofNullable(org.getCountry())
.ifPresent(
value -> {
if (!value.getClassid().equals(eu.dnetlib.dhp.oa.graph.dump.complete.Constants.UNKNOWN)) {
organization
.setCountry(
eu.dnetlib.dhp.oa.model.Country.newInstance(value.getClassid(), value.getClassname()));
}
});
Optional
.ofNullable(org.getId())
.ifPresent(value -> organization.setId(value));
Optional
.ofNullable(org.getPid())
.ifPresent(
value -> organization
.setPid(
value
.stream()
.map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList())));
return organization;
}
}

View File

@ -127,7 +127,7 @@ public class SparkDumpRelationJob implements Serializable {
}, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.mode(SaveMode.Append)
.json(outputPath);
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
package eu.dnetlib.dhp.oa.graph.dump.subset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
@ -8,82 +8,192 @@ import java.io.StringReader;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.expressions.Encode;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.*;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolver;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolverFactory;
import eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints.Param;
import eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints.SelectionConstraints;
import eu.dnetlib.dhp.oa.model.Container;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.oa.model.graph.Funder;
import eu.dnetlib.dhp.oa.model.graph.Project;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
/**
* Dumps of entities in the model defined in eu.dnetlib.dhp.schema.dump.oaf.graph. Results are dumped using the same
* Mapper as for eu.dnetlib.dhp.schema.dump.oaf.community, while for the other entities the mapping is defined below
* Spark Job that fires the dump for the entites
*/
public class DumpGraphEntities implements Serializable {
public class SparkDumpResult implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkDumpResult.class);
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
public static final String COMPRESSION = "compression";
public static final String GZIP = "gzip";
public void run(Boolean isSparkSessionManaged,
String inputPath,
String outputPath,
Class<? extends OafEntity> inputClazz,
String communityMapPath) {
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkDumpResult.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
SparkConf conf = new SparkConf();
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
switch (ModelSupport.idPrefixMap.get(inputClazz)) {
case "50":
DumpProducts d = new DumpProducts();
d
.run(
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, GraphResult.class,
eu.dnetlib.dhp.oa.graph.dump.Constants.DUMPTYPE.COMPLETE.getType());
break;
case "40":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
projectMap(spark, inputPath, outputPath, inputClazz);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
});
break;
case "20":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
organizationMap(spark, inputPath, outputPath, inputClazz);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
});
break;
case "10":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
datasourceMap(spark, inputPath, outputPath, inputClazz);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
});
break;
final String resultType = parser.get("resultType");
log.info("resultType: {}", resultType);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
Optional<String> pathString = Optional.ofNullable(parser.get("pathMap"));
HashMap<String, String> pathMap = null;
if (pathString.isPresent()) {
pathMap = new Gson().fromJson(parser.get("pathMap"), HashMap.class);
log.info("pathMap: {}", new Gson().toJson(pathMap));
}
final Optional<String> parameter = Optional.ofNullable(parser.get("selectionCriteria"));
SelectionConstraints selectionConstraints = null;
if (parameter.isPresent()) {
selectionConstraints = new ObjectMapper().readValue(parameter.get(), SelectionConstraints.class);
selectionConstraints.addResolver(resolver);
}
Class<? extends OafEntity> inputClazz = (Class<? extends OafEntity>) Class.forName(resultClassName);
run(
isSparkSessionManaged, inputPath, outputPath, pathMap, selectionConstraints, inputClazz,
resultType);
}
private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath,
HashMap<String, String> pathMap, SelectionConstraints selectionConstraints,
Class<? extends OafEntity> inputClazz, String resultType) {
SparkConf conf = new SparkConf();
HashMap<String, String> finalPathMap = pathMap;
SelectionConstraints finalSelectionConstraints = selectionConstraints;
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath + "/original/" + resultType);
Utils.removeOutputDir(spark, outputPath + "/dump/" + resultType);
resultDump(
spark, inputPath, outputPath, inputClazz, finalPathMap,
finalSelectionConstraints, resultType);
});
}
public static <I extends OafEntity> void resultDump(
SparkSession spark,
String inputPath,
String outputPath,
Class<I> inputClazz,
Map<String, String> pathMap,
SelectionConstraints selectionConstraints,
String resultType) {
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<I, I>) value -> filterResult(value, pathMap, selectionConstraints, inputClazz),
Encoders.bean(inputClazz))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/original/" + resultType);
Utils
.readPath(spark, outputPath + "/original/" + resultType, inputClazz)
.map(
(MapFunction<I, GraphResult>) value -> (GraphResult) ResultMapper
.map(
value, null,
Constants.DUMPTYPE.COMPLETE.getType()),
Encoders.bean(GraphResult.class))
.map((MapFunction<GraphResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
.write()
.mode(SaveMode.Overwrite)
.option(COMPRESSION, GZIP)
.text(outputPath + "/dump/" + resultType);
}
private static <I extends OafEntity> I filterResult(I value, Map<String, String> pathMap,
SelectionConstraints selectionConstraints, Class<I> inputClazz) {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
log.info("Filtering result");
if (Boolean.FALSE.equals(odInfo.isPresent())) {
return null;
}
if (Boolean.TRUE.equals(odInfo.get().getDeletedbyinference())
|| Boolean.TRUE.equals(odInfo.get().getInvisible())) {
return null;
}
if (selectionConstraints != null) {
Param param = new Param();
String json = new Gson().toJson(value, inputClazz);
DocumentContext jsonContext = JsonPath.parse(json);
for (String key : pathMap.keySet()) {
try {
param.insert(key, jsonContext.read(pathMap.get(key)));
} catch (com.jayway.jsonpath.PathNotFoundException e) {
param.insert(key, new ArrayList<>());
}
}
if (!selectionConstraints.verifyCriteria(param)) {
return null;
}
}
return value;
}
private static <E extends OafEntity> void datasourceMap(SparkSession spark, String inputPath, String outputPath,
@ -96,7 +206,7 @@ public class DumpGraphEntities implements Serializable {
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.option(COMPRESSION, GZIP)
.json(outputPath);
}
@ -109,7 +219,7 @@ public class DumpGraphEntities implements Serializable {
Encoders.bean(Project.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.option(COMPRESSION, GZIP)
.json(outputPath);
}
@ -413,31 +523,31 @@ public class DumpGraphEntities implements Serializable {
final Document doc;
doc = new SAXReader().read(new StringReader(fundingtree));
f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText());
f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
// f.setId(((org.dom4j.Node) (doc.selectNodes("//funder/id").get(0))).getText());
f.setShortName(((Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText());
f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
String id = "";
String description = "";
// List<Levels> fundings = new ArrayList<>();
StringBuilder bld = new StringBuilder();
int level = 0;
List<org.dom4j.Node> nodes = doc.selectNodes("//funding_level_" + level);
while (nodes.size() > 0) {
for (org.dom4j.Node n : nodes) {
List<Node> nodes = doc.selectNodes("//funding_level_" + level);
while (!nodes.isEmpty()) {
for (Node n : nodes) {
List node = n.selectNodes("./id");
id = ((org.dom4j.Node) node.get(0)).getText();
id = ((Node) node.get(0)).getText();
id = id.substring(id.indexOf("::") + 2);
node = n.selectNodes("./description");
description += ((Node) node.get(0)).getText() + " - ";
bld.append(((Node) node.get(0)).getText() + " - ");
}
level += 1;
nodes = doc.selectNodes("//funding_level_" + level);
}
String description = bld.toString();
if (!id.equals("")) {
Fundings fundings = new Fundings();
fundings.setId(id);
@ -456,16 +566,16 @@ public class DumpGraphEntities implements Serializable {
.map(
(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
Encoders.bean(Organization.class))
.filter((FilterFunction<Organization>) o -> o != null)
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.option(COMPRESSION, GZIP)
.json(outputPath);
}
private static eu.dnetlib.dhp.oa.model.graph.Organization mapOrganization(
private static Organization mapOrganization(
eu.dnetlib.dhp.schema.oaf.Organization org) {
if (org.getDataInfo().getDeletedbyinference())
if (Boolean.TRUE.equals(org.getDataInfo().getDeletedbyinference()))
return null;
Organization organization = new Organization();
@ -495,8 +605,10 @@ public class DumpGraphEntities implements Serializable {
.ofNullable(org.getCountry())
.ifPresent(
value -> {
if (!value.getClassid().equals(Constants.UNKNOWN)) {
organization.setCountry(Country.newInstance(value.getClassid(), value.getClassname()));
if (!value.getClassid().equals(eu.dnetlib.dhp.oa.graph.dump.complete.Constants.UNKNOWN)) {
organization
.setCountry(
eu.dnetlib.dhp.oa.model.Country.newInstance(value.getClassid(), value.getClassname()));
}
});
@ -517,4 +629,5 @@ public class DumpGraphEntities implements Serializable {
return organization;
}
}

View File

@ -0,0 +1,277 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.*;
import javax.print.attribute.standard.MediaSize;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.*;
import org.apache.spark.sql.*;
import org.apache.spark.sql.Dataset;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Function1;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 11/11/22
*/
public class SparkSelectSubset implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkSelectSubset.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkSelectSubset.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
Optional<String> rs = Optional.ofNullable(parser.get("removeSet"));
final Set<String> removeSet = new HashSet<>();
if (rs.isPresent()) {
Collections.addAll(removeSet, rs.get().split(";"));
}
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
selectSubset(spark, inputPath, outputPath, removeSet);
});
}
private static void selectSubset(SparkSession spark, String inputPath, String outputPath, Set<String> removeSet) {
Dataset<Relation> relation = Utils
.readPath(spark, inputPath + "/relation", Relation.class)
.filter(
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference()
&& !removeSet.contains(r.getRelClass()));
Dataset<String> resultIds = Utils
.readPath(spark, outputPath + "/dump/publication", GraphResult.class)
.map((MapFunction<GraphResult, String>) p -> p.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, outputPath + "/dump/dataset", GraphResult.class)
.map((MapFunction<GraphResult, String>) d -> d.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, outputPath + "/dump/software", GraphResult.class)
.map((MapFunction<GraphResult, String>) s -> s.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, outputPath + "/dump/otherresearchproduct", GraphResult.class)
.map((MapFunction<GraphResult, String>) o -> o.getId(), Encoders.STRING()));
// select result -> result relations
Dataset<Relation> relJoinSource = relation
.joinWith(resultIds, relation.col("source").equalTo(resultIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
relJoinSource
.joinWith(resultIds, relJoinSource.col("target").equalTo(resultIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath + "/original/relation");
// save the relations among other entities and the results
Dataset<String> otherIds = Utils
.readPath(spark, inputPath + "/organization", Organization.class)
.filter((FilterFunction<Organization>) e -> !e.getDataInfo().getDeletedbyinference())
.map((MapFunction<Organization, String>) o -> o.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, inputPath + "/project", Project.class)
.filter((FilterFunction<Project>) e -> !e.getDataInfo().getDeletedbyinference())
.map((MapFunction<Project, String>) p -> p.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/datasource", Datasource.class)
.filter((FilterFunction<Datasource>) e -> !e.getDataInfo().getDeletedbyinference())
.map((MapFunction<Datasource, String>) d -> d.getId(), Encoders.STRING()));
relJoinSource = relation
.joinWith(resultIds, relation.col("source").equalTo(resultIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
relJoinSource
.joinWith(otherIds, relJoinSource.col("target").equalTo(otherIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(outputPath + "/original/relation");
relJoinSource = relation
.joinWith(resultIds, relation.col("target").equalTo(resultIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
relJoinSource
.foreach((ForeachFunction<Relation>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
relJoinSource
.joinWith(otherIds, relJoinSource.col("source").equalTo(otherIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(outputPath + "/original/relation");
relJoinSource = Utils.readPath(spark, outputPath + "/original/relation", Relation.class);
// Save the entities in relations with at least one result
// relations are bidirectional, so it is enough to have the match to one side
Dataset<Organization> organization = Utils
.readPath(spark, inputPath + "/organization", Organization.class)
.filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference());
organization
.joinWith(relJoinSource, organization.col("id").equalTo(relJoinSource.col("source")))
.map(
(MapFunction<Tuple2<Organization, Relation>, Organization>) t2 -> t2._1(),
Encoders.bean(Organization.class))
.groupByKey((MapFunction<Organization, String>) v -> v.getId(), Encoders.STRING())
.mapGroups(
(MapGroupsFunction<String, Organization, Organization>) (k, it) -> it.next(),
Encoders.bean(Organization.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/original/organization");
Dataset<Datasource> datasource = Utils
.readPath(spark, inputPath + "/datasource", Datasource.class)
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference());
datasource
.joinWith(relJoinSource, datasource.col("id").equalTo(relJoinSource.col("source")))
.map((MapFunction<Tuple2<Datasource, Relation>, Datasource>) t2 -> t2._1(), Encoders.bean(Datasource.class))
.groupByKey((MapFunction<Datasource, String>) v -> v.getId(), Encoders.STRING())
.mapGroups(
(MapGroupsFunction<String, Datasource, Datasource>) (k, it) -> it.next(),
Encoders.bean(Datasource.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/original/datasource");
// plus we need to dump all the datasource in collectedfrom hostedby
Dataset<String> cfhb = Utils
.readPath(spark, outputPath + "/original/publication", Publication.class)
.flatMap(
(FlatMapFunction<Publication, Instance>) p -> p.getInstance().iterator(), Encoders.bean(Instance.class))
.union(
Utils
.readPath(spark, outputPath + "/original/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class)
.flatMap(
(FlatMapFunction<eu.dnetlib.dhp.schema.oaf.Dataset, Instance>) p -> p.getInstance().iterator(),
Encoders.bean(Instance.class)))
.union(
Utils
.readPath(spark, outputPath + "/original/software", Software.class)
.flatMap(
(FlatMapFunction<Software, Instance>) p -> p.getInstance().iterator(),
Encoders.bean(Instance.class)))
.union(
Utils
.readPath(spark, outputPath + "/original/otherresearchproduct", OtherResearchProduct.class)
.flatMap(
(FlatMapFunction<OtherResearchProduct, Instance>) p -> p.getInstance().iterator(),
Encoders.bean(Instance.class)))
.flatMap(
(FlatMapFunction<Instance, String>) i -> Arrays
.asList(i.getCollectedfrom().getKey(), i.getHostedby().getKey())
.iterator(),
Encoders.STRING())
.filter((FilterFunction<String>) s -> !s.equals(ModelConstants.UNKNOWN))
.distinct();
datasource
.joinWith(cfhb, datasource.col("id").equalTo(cfhb.col("value")))
.map((MapFunction<Tuple2<Datasource, String>, Datasource>) t2 -> t2._1(), Encoders.bean(Datasource.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(outputPath + "/original/datasource");
Dataset<Project> project = Utils
.readPath(spark, inputPath + "/project", Project.class)
.filter((FilterFunction<Project>) d -> !d.getDataInfo().getDeletedbyinference());
project
.joinWith(relJoinSource, project.col("id").equalTo(relJoinSource.col("source")))
.map((MapFunction<Tuple2<Project, Relation>, Project>) t2 -> t2._1(), Encoders.bean(Project.class))
.groupByKey((MapFunction<Project, String>) v -> v.getId(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Project, Project>) (k, it) -> it.next(), Encoders.bean(Project.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/original/project");
// save the relations among entities different from the result
Dataset<String> selectedIDs = Utils
.readPath(spark, outputPath + "/original/project", Project.class)
.map((MapFunction<Project, String>) p -> p.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, outputPath + "/original/organization", Organization.class)
.map((MapFunction<Organization, String>) o -> o.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, outputPath + "/original/datasource", Datasource.class)
.map((MapFunction<Datasource, String>) d -> d.getId(), Encoders.STRING()));
relJoinSource = relation
.joinWith(selectedIDs, relation.col("source").equalTo(selectedIDs.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
relJoinSource
.joinWith(selectedIDs, relJoinSource.col("target").equalTo(selectedIDs.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(outputPath + "/original/relation");
}
}

View File

@ -0,0 +1,135 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 15/11/22
*/
public class SparkSelectValidContext implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkSelectValidContext.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkSelectValidContext.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_select_context.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String contextPath = parser.get("contextPath");
log.info("contextPath: {}", contextPath);
final String communityMapPath = parser.get("communityMapPath");
log.info("communityMapPath: {}", communityMapPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
selectValidContext(spark, inputPath, contextPath, communityMapPath, outputPath);
});
}
private static void selectValidContext(SparkSession spark, String inputPath, String contextPath,
String communityMapPath, String outputPath) {
List<String> keys = Arrays
.asList(Utils.getCommunityMap(spark, communityMapPath).keySet().stream().toArray(String[]::new));
Dataset<String> context = getFilter(spark, inputPath + "/publication", keys, Publication.class)
.union(getFilter(spark, inputPath + "/dataset", keys, eu.dnetlib.dhp.schema.oaf.Dataset.class))
.union(getFilter(spark, inputPath + "/software", keys, Software.class))
.union(getFilter(spark, inputPath + "/otherresearchproduct", keys, OtherResearchProduct.class));
Dataset<ResearchCommunity> researchCommunity = Utils.readPath(spark, contextPath, ResearchCommunity.class);
researchCommunity
.joinWith(context, researchCommunity.col("acronym").equalTo(context.col("value")))
.map(
(MapFunction<Tuple2<ResearchCommunity, String>, ResearchCommunity>) t2 -> t2._1(),
Encoders.bean(ResearchCommunity.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
private static <I extends Result> Dataset<String> getFilter(SparkSession spark, String inputPath,
List<String> keys, Class<I> inputClazz) {
return Utils
.readPath(spark, inputPath, inputClazz)
.filter((FilterFunction<I>) r -> isPresentContext(r))
.flatMap(
(FlatMapFunction<I, String>) r -> r
.getContext()
.stream()
.map(c -> c.getId())
.collect(Collectors.toList())
.iterator(),
Encoders.STRING())
.filter((FilterFunction<String>) c -> extracted(c, keys)).distinct();
}
private static <I extends Result> boolean isPresentContext(I r) {
return Optional.ofNullable(r.getContext()).isPresent();
}
private static boolean extracted(String c, List<String> keySet) {
if (keySet.contains(c))
return true;
if (c.contains("::") && keySet.contains(c.substring(0, c.indexOf("::"))))
return true;
return false;
}
}

View File

@ -0,0 +1,148 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 15/11/22
*/
public class SparkSelectValidRelationContext implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelationContext.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkSelectValidRelationContext.class
.getResourceAsStream(
""));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
// results dumped
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String contextPath = parser.get("contextPath");
log.info("contextPath: {}", contextPath);
final String contextRelationPath = parser.get("contextRelationPath");
log.info("contextRelationPath: {}", contextRelationPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
selectValidRelation(spark, inputPath, contextRelationPath);
});
}
private static void selectValidRelation(SparkSession spark, String inputPath,
String contextRelationPath) {
// read the results
Dataset<String> dumpedIds = Utils
.readPath(spark, inputPath + "/publication", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, inputPath + "/dataset", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/software", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/otherresearchproduct", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/organization", eu.dnetlib.dhp.oa.model.graph.Organization.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Organization, String>) o -> o.getId(),
Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.oa.model.graph.Project.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Project, String>) o -> o.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/datasource", eu.dnetlib.dhp.oa.model.graph.Datasource.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Datasource, String>) o -> o.getId(),
Encoders.STRING()));
Dataset<Relation> relation = Utils
.readPath(spark, contextRelationPath + "/context", Relation.class)
.union(Utils.readPath(spark, contextRelationPath + "/contextOrg", Relation.class));
Dataset<CommunityResult> allowedContext = Utils.readPath(spark, inputPath + "/community_infrastructure", CommunityResult.class);
Dataset<Relation> relJoinSource = relation
.joinWith(dumpedIds, relation.col("source").equalTo(dumpedIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
relJoinSource
.joinWith(allowedContext, relJoinSource.col("target").equalTo(allowedContext.col("id")))
.map(
(MapFunction<Tuple2<Relation, CommunityResult>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(inputPath + "/relation");
relJoinSource = relation
.joinWith(dumpedIds, relation.col("target").equalTo(dumpedIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
relJoinSource
.joinWith(allowedContext, relJoinSource.col("source").equalTo(allowedContext.col("id")))
.map(
(MapFunction<Tuple2<Relation, CommunityResult>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(inputPath + "/relation");
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("contains")
public class ContainsVerb implements Selection, Serializable {
private String param;
public ContainsVerb() {
}
public ContainsVerb(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return value.contains(param);
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("contains_ignorecase")
public class ContainsVerbIgnoreCase implements Selection, Serializable {
private String param;
public ContainsVerbIgnoreCase() {
}
public ContainsVerbIgnoreCase(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return value.toLowerCase().contains(param.toLowerCase());
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("equals")
public class EqualVerb implements Selection, Serializable {
private String param;
public EqualVerb() {
}
public EqualVerb(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return value.equals(param);
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("equals_ignorecase")
public class EqualVerbIgnoreCase implements Selection, Serializable {
private String param;
public EqualVerbIgnoreCase() {
}
public EqualVerbIgnoreCase(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return value.equalsIgnoreCase(param);
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,34 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 11/11/22
*/
@VerbClass("greater_than")
public class GreatThanVerb implements Selection, Serializable {
private String param;
public GreatThanVerb() {
}
public GreatThanVerb(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return value.compareTo(param) > 0;
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,43 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.lang.reflect.Type;
import com.google.gson.*;
public class InterfaceAdapter implements JsonSerializer, JsonDeserializer {
private static final String CLASSNAME = "CLASSNAME";
private static final String DATA = "DATA";
public Object deserialize(
JsonElement jsonElement,
Type type,
JsonDeserializationContext jsonDeserializationContext)
throws JsonParseException {
JsonObject jsonObject = jsonElement.getAsJsonObject();
JsonPrimitive prim = (JsonPrimitive) jsonObject.get(CLASSNAME);
String className = prim.getAsString();
Class klass = getObjectClass(className);
return jsonDeserializationContext.deserialize(jsonObject.get(DATA), klass);
}
public JsonElement serialize(
Object jsonElement, Type type, JsonSerializationContext jsonSerializationContext) {
JsonObject jsonObject = new JsonObject();
jsonObject.addProperty(CLASSNAME, jsonElement.getClass().getName());
jsonObject.add(DATA, jsonSerializationContext.serialize(jsonElement));
return jsonObject;
}
/** **** Helper method to get the className of the object to be deserialized **** */
public Class getObjectClass(String className) {
try {
return Class.forName(className);
} catch (ClassNotFoundException e) {
// e.printStackTrace();
throw new JsonParseException(e.getMessage());
}
}
}

View File

@ -0,0 +1,34 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 11/11/22
*/
@VerbClass("lesser_than")
public class LessThanVerb implements Selection, Serializable {
private String param;
public LessThanVerb() {
}
public LessThanVerb(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return value.compareTo(param) < 0;
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("not_contains")
public class NotContainsVerb implements Selection, Serializable {
private String param;
public NotContainsVerb() {
}
public NotContainsVerb(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return !value.contains(param);
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("not_contains_ignorecase")
public class NotContainsVerbIgnoreCase implements Selection, Serializable {
private String param;
public NotContainsVerbIgnoreCase() {
}
public NotContainsVerbIgnoreCase(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return !(value.toLowerCase().contains(param.toLowerCase()));
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("not_equals")
public class NotEqualVerb implements Selection, Serializable {
private String param;
public NotEqualVerb(final String param) {
this.param = param;
}
public NotEqualVerb() {
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return !value.equals(param);
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("not_equals_ignorecase")
public class NotEqualVerbIgnoreCase implements Selection, Serializable {
private String param;
public NotEqualVerbIgnoreCase(final String param) {
this.param = param;
}
public NotEqualVerbIgnoreCase() {
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return !value.equalsIgnoreCase(param);
}
}

View File

@ -0,0 +1,9 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
public interface Selection extends Serializable {
boolean apply(String value);
}

View File

@ -0,0 +1,14 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
@interface VerbClass {
String value();
}

View File

@ -0,0 +1,54 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.util.Map;
import java.util.stream.Collectors;
import io.github.classgraph.ClassGraph;
import io.github.classgraph.ClassInfoList;
import io.github.classgraph.ScanResult;
public class VerbResolver implements Serializable {
private Map<String, Class<Selection>> map = null;
public VerbResolver() {
final ClassGraph classgraph = new ClassGraph();
try (ScanResult scanResult = // Assign scanResult in try-with-resources
classgraph // Create a new ClassGraph instance
.verbose() // If you want to enable logging to stderr
.enableAllInfo() // Scan classes, methods, fields, annotations
.whitelistPackages(
"eu.dnetlib.dhp.oa.graph.dump.subset.criteria") // Scan com.xyz and subpackages
.scan()) { // Perform the scan and return a ScanResult
ClassInfoList routeClassInfoList = scanResult
.getClassesWithAnnotation(
"eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbClass");
this.map = routeClassInfoList
.stream()
.collect(
Collectors
.toMap(
value -> (String) value
.getAnnotationInfo()
.get(0)
.getParameterValues()
.get(0)
.getValue(),
value -> (Class<Selection>) value.loadClass()));
}
}
public Selection getSelectionCriteria(String name, String param)
throws NoSuchMethodException, IllegalAccessException, InvocationTargetException,
InstantiationException {
// return Class.forName(tmp_map.get(name)).
return map.get(name).getDeclaredConstructor((String.class)).newInstance(param);
}
}

View File

@ -0,0 +1,13 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
public class VerbResolverFactory {
private VerbResolverFactory() {
}
public static VerbResolver newInstance() {
return new VerbResolver();
}
}

View File

@ -0,0 +1,53 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.Selection;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolver;
public class Constraint implements Serializable {
private String verb;
private String field;
private String value;
private Selection selection;
public String getVerb() {
return verb;
}
public void setVerb(String verb) {
this.verb = verb;
}
public String getField() {
return field;
}
public void setField(String field) {
this.field = field;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
// public void setSelection(Selection sel) {
// selection = sel;
// }
public void setSelection(VerbResolver resolver)
throws InvocationTargetException, NoSuchMethodException, InstantiationException,
IllegalAccessException {
selection = resolver.getSelectionCriteria(verb, value);
}
public boolean verifyCriteria(String metadata) {
return selection.apply(metadata);
}
}

View File

@ -0,0 +1,65 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Type;
import java.util.Collection;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolver;
/** Created by miriam on 02/08/2018. */
public class Constraints implements Serializable {
private static final Log log = LogFactory.getLog(Constraints.class);
private List<Constraint> constraint;
public List<Constraint> getConstraint() {
return constraint;
}
public void setConstraint(List<Constraint> constraint) {
this.constraint = constraint;
}
public void setSc(String json) {
Type collectionType = new TypeToken<Collection<Constraint>>() {
}.getType();
constraint = new Gson().fromJson(json, collectionType);
}
void setSelection(VerbResolver resolver) {
for (Constraint st : constraint) {
try {
st.setSelection(resolver);
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException
| InstantiationException e) {
log.error(e.getMessage());
}
}
}
// Constraint in and
public boolean verifyCriteria(final Param param) {
for (Constraint sc : constraint) {
boolean verified = false;
for (String value : param.get(sc.getField())) {
if (sc.verifyCriteria(value.trim())) {
verified = true;
}
}
if (!verified)
return verified;
}
return true;
}
}

View File

@ -0,0 +1,32 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.function.BiFunction;
import scala.xml.PrettyPrinter;
/**
* @author miriam.baglioni
* @Date 11/11/22
*/
public class Param extends HashMap<String, ArrayList<String>> implements Serializable {
public Param() {
super();
}
public void insert(String key, Object value) {
if (value instanceof ArrayList)
super.put(key, (ArrayList<String>) value);
if (value instanceof String) {
ArrayList<String> al = new ArrayList<>();
al.add((String) value);
super.put(key, al);
}
}
}

View File

@ -0,0 +1,15 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints;
import java.io.Serializable;
import java.util.HashMap;
/**
* @author miriam.baglioni
* @Date 11/11/22
*/
public class ProtoMap extends HashMap<String, String> implements Serializable {
public ProtoMap() {
super();
}
}

View File

@ -0,0 +1,47 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints;
import java.io.Serializable;
import java.lang.reflect.Type;
import java.util.Collection;
import java.util.List;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolver;
public class SelectionConstraints implements Serializable {
private List<Constraints> criteria;
public List<Constraints> getCriteria() {
return criteria;
}
public void setCriteria(List<Constraints> criteria) {
this.criteria = criteria;
}
public void setSc(String json) {
Type collectionType = new TypeToken<Collection<Constraints>>() {
}.getType();
criteria = new Gson().fromJson(json, collectionType);
}
// Constraints in or
public boolean verifyCriteria(final Param param) {
for (Constraints selc : criteria) {
if (selc.verifyCriteria(param)) {
return true;
}
}
return false;
}
public void addResolver(VerbResolver resolver) {
for (Constraints cs : criteria) {
cs.setSelection(resolver);
}
}
}

View File

@ -4,7 +4,7 @@
"paramName":"cmp",
"paramLongName":"communityMapPath",
"paramDescription": "the path to the serialization of the community map",
"paramRequired": true
"paramRequired": false
},
{
"paramName":"s",
@ -35,12 +35,32 @@
"paramLongName":"dumpType",
"paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project",
"paramRequired": false
}, {
},
{
"paramName":"cid",
"paramLongName":"communityId",
"paramDescription": "the id of the community to be dumped",
"paramRequired": false
}
},
{
"paramName":"sc",
"paramLongName":"selectionCriteria",
"paramDescription": "the selection criteria to choose the results",
"paramRequired": false
},
{
"paramName":"pm",
"paramLongName":"pathMap",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
},
{
"paramName":"rt",
"paramLongName":"resultType",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
}
]

View File

@ -25,6 +25,12 @@
"paramLongName": "removeSet",
"paramDescription": "the list of classname relations, split by ';', not to be dumped",
"paramRequired": false
},
{
"paramName": "wd",
"paramLongName": "workingDir",
"paramDescription": "the list of classname relations, split by ';', not to be dumped",
"paramRequired": false
}
]

View File

@ -0,0 +1,37 @@
[
{
"paramName":"cmp",
"paramLongName":"communityMapPath",
"paramDescription": "the path to the serialization of the community map",
"paramRequired": false
},
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"cp",
"paramLongName":"contextPath",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -1,4 +1,5 @@
## This is a classpath-based import file (this header is required)
dump_complete classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app
dump_funder classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app
dump_community classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app
dump_community classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app
dump_subset classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app

View File

@ -173,10 +173,52 @@
<switch>
<case to="dump_funder">${wf:conf('dumpType') eq "funder"}</case>
<case to="dump_community">${wf:conf('dumpType') eq "community"}</case>
<case to="dump_subset">${wf:conf('dumpType') eq "subset"}</case>
<default to="dump_complete"/>
</switch>
</decision>
<!-- Sub-workflow which runs the dump subset for the complete graph -->
<action name="dump_subset">
<sub-workflow>
<app-path>${wf:appPath()}/dump_subset
</app-path>
<propagate-configuration/>
<configuration>
<property>
<name>communityMapPath</name>
<value>${workingDir}/communityMap</value>
</property>
<property>
<name>outputPath</name>
<value>${outputPath}</value>
</property>
<property>
<name>sourcePath</name>
<value>${sourcePath}</value>
</property>
<property>
<name>organizationCommunityMap</name>
<value>${organizationCommunityMap}</value>
</property>
<property>
<name>isLookUpUrl</name>
<value>${isLookUpUrl}</value>
</property>
<property>
<name>pathMap</name>
<value>${pathMap}</value>
</property>
<property>
<name>selectionCriteria</name>
<value>${selectionCriteria}</value>
</property>
</configuration>
</sub-workflow>
<ok to="End" />
<error to="Kill" />
</action>
<!-- Sub-workflow which runs the dump for the complete graph -->
<action name="dump_complete">
<sub-workflow>

View File

@ -120,7 +120,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/publication</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -146,7 +145,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/dataset</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -172,7 +170,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -198,7 +195,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/software</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -224,7 +220,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/organization</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
<arg>--outputPath</arg><arg>${outputPath}/organization</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -250,7 +245,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
<arg>--outputPath</arg><arg>${outputPath}/project</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -276,7 +270,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/datasource</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
<arg>--outputPath</arg><arg>${outputPath}/datasource</arg>
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>

View File

@ -0,0 +1,30 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,569 @@
<workflow-app name="sub-dump_subset" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>organizationCommunityMap</name>
<description>the organization community map</description>
</property>
<property>
<name>hiveDbName</name>
<description>the target hive database name</description>
</property>
<property>
<name>hiveJdbcUrl</name>
<description>hive server jdbc url</description>
</property>
<property>
<name>hiveMetastoreUris</name>
<description>hive server metastore URIs</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="fork_select_and_dump" />
<fork name="fork_select_and_dump">
<path start="select_and_dump_publication"/>
<path start="select_and_dump_dataset"/>
<path start="select_and_dump_orp"/>
<path start="select_and_dump_software"/>
</fork>
<action name="select_and_dump_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table publication </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--selectionCriteria</arg><arg>${selectionCriteria}</arg>
<arg>--resultType</arg><arg>publication</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="select_and_dump_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table dataset </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--selectionCriteria</arg><arg>${selectionCriteria}</arg>
<arg>--resultType</arg><arg>dataset</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="select_and_dump_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table ORP </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--selectionCriteria</arg><arg>${selectionCriteria}</arg>
<arg>--resultType</arg><arg>otherresearchproduct</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="select_and_dump_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--selectionCriteria</arg><arg>${selectionCriteria}</arg>
<arg>--resultType</arg><arg>software</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<join name="join_dump" to="select_subset"/>
<action name="select_subset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Select valid table relation </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectSubset</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--removeSet</arg><arg>${removeSet}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<fork name="fork_dump_otherentities">
<path start="dump_organization"/>
<path start="dump_project"/>
<path start="dump_datasource"/>
</fork>
<action name="dump_organization">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table organization </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/organization</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/organization</arg>
</spark>
<ok to="join_dump_otherentities"/>
<error to="Kill"/>
</action>
<action name="dump_project">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table project </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/project</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/project</arg>
</spark>
<ok to="join_dump_otherentities"/>
<error to="Kill"/>
</action>
<action name="dump_datasource">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table datasource </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/datasource</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/datasource</arg>
</spark>
<ok to="join_dump_otherentities"/>
<error to="Kill"/>
</action>
<join name="join_dump_otherentities" to="fork_context"/>
<fork name="fork_context">
<path start="create_entities_fromcontext"/>
<path start="create_relation_fromcontext"/>
<path start="create_relation_fromorgs"/>
</fork>
<action name="create_entities_fromcontext">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/context/community_infrastructure.json.gz</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java>
<ok to="select_valid_context"/>
<error to="Kill"/>
</action>
<action name="select_valid_context">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectValidContext</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original</arg>
<arg>--contextPath</arg><arg>${workingDir}/context/community_infrastructure.json.gz</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/communities_infrastructures</arg>
</spark>
<ok to="join_context"/>
<error to="Kill"/>
</action>
<action name="create_relation_fromcontext">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/relation/context</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java>
<ok to="join_context"/>
<error to="Kill"/>
</action>
<action name="create_relation_fromorgs">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table relation </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/relation/contextOrg</arg>
<arg>--organizationCommunityMap</arg><arg>${organizationCommunityMap}</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_context"/>
<error to="Kill"/>
</action>
<join name="join_context" to="filter_relation_context"/>
<action name="filter_relation_context">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectValidRelationContext</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/dump</arg>
<arg>--relationPath</arg><arg>${workingDir}/relation</arg> <!-- new relations from context -->
<arg>--contextPath</arg><arg>${outputPath}/dump/communities_infrastructures</arg>
</spark>
<ok to="dump_relation"/>
<error to="Kill"/>
</action>
<action name="dump_relation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table relation </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/relation</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/relation</arg>
<arg>--removeSet</arg><arg>${removeSet}</arg>
</spark>
<ok to="rels_from_pubs"/>
<error to="Kill"/>
</action>
<action name="rels_from_pubs">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extract Relations from publication </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/relation</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="rels_from_dats"/>
<error to="Kill"/>
</action>
<action name="rels_from_dats">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table dataset </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/result/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/relation</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="rels_from_orp"/>
<error to="Kill"/>
</action>
<action name="rels_from_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table ORP </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/relation</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="rels_from_sw"/>
<error to="Kill"/>
</action>
<action name="rels_from_sw">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/result/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/relation</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<kill name="Kill">
<message>Sub-workflow dump complete failed with error message ${wf:errorMessage()}
</message>
</kill>
<end name="End" />
</workflow-app>

View File

@ -4,8 +4,6 @@ package eu.dnetlib.dhp.oa.graph.dump;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.FileUtils;
@ -27,6 +25,13 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts;
import eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob;
import eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolver;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolverFactory;
import eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints.ProtoMap;
import eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints.SelectionConstraints;
import eu.dnetlib.dhp.oa.model.*;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
@ -110,7 +115,7 @@ public class DumpJobTest {
}
@Test
public void testDumpIndicators() {
public void testDumpIndicators() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publicationWithMeasures")
.getPath();
@ -119,12 +124,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -145,7 +154,7 @@ public class DumpJobTest {
}
@Test
public void testPublicationDump() throws JsonProcessingException {
public void testPublicationDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
.getPath();
@ -154,12 +163,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -432,7 +445,7 @@ public class DumpJobTest {
}
@Test
public void testDatasetDump() {
public void testDatasetDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance")
.getPath();
@ -441,12 +454,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result",
communityMapPath, Dataset.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -513,7 +530,7 @@ public class DumpJobTest {
}
@Test
public void testSoftwareDump() {
public void testSoftwareDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance")
.getPath();
@ -522,12 +539,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result",
communityMapPath, Software.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -563,7 +584,7 @@ public class DumpJobTest {
}
@Test
public void testOrpDump() {
public void testOrpDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance")
.getPath();
@ -572,12 +593,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result",
communityMapPath, OtherResearchProduct.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -616,7 +641,7 @@ public class DumpJobTest {
}
@Test
public void testPublicationDumpCommunity() throws JsonProcessingException {
public void testPublicationDumpCommunity() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
@ -626,11 +651,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -676,7 +706,7 @@ public class DumpJobTest {
}
@Test
public void testDataset() {
public void testDataset() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json")
@ -686,11 +716,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -734,7 +769,7 @@ public class DumpJobTest {
}
@Test
public void testDataset2All() {
public void testDataset2All() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
@ -744,12 +779,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -765,7 +804,7 @@ public class DumpJobTest {
}
@Test
public void testDataset2Communities() {
public void testDataset2Communities() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
@ -775,11 +814,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -795,7 +839,7 @@ public class DumpJobTest {
}
@Test
public void testPublication() {
public void testPublication() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json")
@ -805,12 +849,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -829,7 +877,7 @@ public class DumpJobTest {
}
@Test
public void testSoftware() {
public void testSoftware() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software.json")
@ -839,12 +887,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -862,7 +914,7 @@ public class DumpJobTest {
}
@Test
public void testORP() {
public void testORP() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp.json")
@ -872,12 +924,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -895,7 +951,7 @@ public class DumpJobTest {
}
@Test
public void testRecord() {
public void testRecord() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json")
.getPath();
@ -904,12 +960,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
@ -927,7 +987,7 @@ public class DumpJobTest {
}
@Test
public void testArticlePCA() {
public void testArticlePCA() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca")
.getPath();
@ -936,11 +996,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1012,15 +1077,4 @@ public class DumpJobTest {
.getString(2));
}
@Test
void provaSerializzazione() throws IOException {
ImpactMeasures im = new ImpactMeasures();
Score s = new Score();
s.setClazz("pippo");
s.setScore("pluto");
im.setInfluence(s);
String st = new ObjectMapper().writeValueAsString(im);
System.out.println(st);
}
}

View File

@ -5,6 +5,8 @@ import java.util.*;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@ -546,6 +548,14 @@ class CreateRelationTest {
cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add));
rList.forEach(r -> {
try {
System.out.println(new ObjectMapper().writeValueAsString(r));
} catch (JsonProcessingException e) {
e.printStackTrace();
}
});
Assertions.assertEquals(34, rList.size());
Assertions
@ -625,6 +635,8 @@ class CreateRelationTest {
tmp.contains("10|opendoar____::97275a23ca44226c9964043c8462be96") &&
tmp.contains("10|doajarticles::2899208a99aa7d142646e0a80bfeef05"));
}
@Test

View File

@ -76,9 +76,15 @@ public class DumpOrganizationProjectDatasourceTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/organization")
.getPath();
DumpGraphEntities dg = new DumpGraphEntities();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Organization",
"-outputPath", workingDir.toString() + "/dump"
dg.run(false, sourcePath, workingDir.toString() + "/dump", Organization.class, null);
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -99,15 +105,21 @@ public class DumpOrganizationProjectDatasourceTest {
}
@Test
public void dumpProjectTest() throws NoAvailableEntityTypeException {
public void dumpProjectTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/project")
.getPath();
DumpGraphEntities dg = new DumpGraphEntities();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Project",
"-outputPath", workingDir.toString() + "/dump"
dg.run(false, sourcePath, workingDir.toString() + "/dump", Project.class, null);
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -128,14 +140,20 @@ public class DumpOrganizationProjectDatasourceTest {
}
@Test
public void dumpDatasourceTest() throws NoAvailableEntityTypeException {
public void dumpDatasourceTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasource")
.getPath();
DumpGraphEntities dg = new DumpGraphEntities();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Datasource",
"-outputPath", workingDir.toString() + "/dump"
dg.run(false, sourcePath, workingDir.toString() + "/dump", Datasource.class, null);
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());

View File

@ -23,7 +23,7 @@ class FunderParsingTest {
"Computer &amp; Information Science &amp; Engineering</description><name>Directorate for Computer &amp; " +
"Information Science &amp; Engineering</name><parent/><class>nsf:fundingStream</class></funding_level_0></parent></funding_level_1></fundingtree>";
Funder f = DumpGraphEntities.getFunder(funding_Stream);
Funder f = SparkDumpEntitiesJob.getFunder(funding_Stream);
Assertions.assertEquals("NSF", f.getShortName());
Assertions.assertEquals("National Science Foundation", f.getName());
@ -54,7 +54,7 @@ class FunderParsingTest {
"<description>Horizon 2020 Framework Programme</description><parent/>" +
"<class>ec:h2020fundings</class></funding_level_0></parent></funding_level_1></parent></funding_level_2></fundingtree>";
Funder f = DumpGraphEntities.getFunder(funding_stream);
Funder f = SparkDumpEntitiesJob.getFunder(funding_stream);
Assertions.assertEquals("EC", f.getShortName());
Assertions.assertEquals("European Commission", f.getName());

View File

@ -0,0 +1,397 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.schema.oaf.*;
/**
* @author miriam.baglioni
* @Date 16/11/22
*/
public class DumpSubsetTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(DumpSubsetTest.class);
private static final CommunityMap map = new CommunityMap();
static {
map.put("egi", "EGI Federation");
map.put("fet-fp7", "FET FP7");
map.put("fet-h2020", "FET H2020");
map.put("clarin", "CLARIN");
map.put("fam", "Fisheries and Aquaculture Management");
map.put("ni", "Neuroinformatics");
map.put("mes", "European Marine Scinece");
map.put("instruct", "Instruct-Eric");
map.put("rda", "Research Data Alliance");
map.put("elixir-gr", "ELIXIR GR");
map.put("aginfra", "Agricultural and Food Sciences");
map.put("dariah", "DARIAH EU");
map.put("risis", "RISI");
map.put("ee", "SDSN - Greece");
map.put("oa-pg", "EC Post-Grant Open Access Pilot");
map.put("beopen", "Transport Research");
map.put("euromarine", "Euromarine");
map.put("ifremer", "Ifremer");
map.put("dh-ch", "Digital Humanities and Cultural Heritage");
map.put("science-innovation-policy", "Science and Innovation Policy Studies");
map.put("covid-19", "COVID-19");
map.put("enrmaps", "Energy Research");
map.put("epos", "EPOS");
}
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(DumpSubsetTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(DumpSubsetTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(DumpSubsetTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test // Step 1
void testSelectionConstraints() throws Exception {
final String pathMap = "{\"author\" : \"$['author'][*]['fullname']\", " +
"\"title\" : \"$['title'][*]['value']\", \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\", "
+
"\"contributor\" : \"$['contributor'][*]['value']\", \"description\" : \"$['description'][*]['value']\", "
+
"\"dateofacceptance\" : \"$['dateofacceptance']['value']\"}";
final String constraint = "{\"criteria\":[{\"constraint\":[{\"verb\":\"lesser_than\",\"field\":\"dateofacceptance\",\"value\":\"2023-01-01\"},{\"verb\":\"greater_than\",\"field\":\"dateofacceptance\",\"value\":\"2006-12-31\"}]}]}";
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpResult
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString(),
"-communityMapPath", communityMapPath,
"-pathMap", pathMap,
"-selectionCriteria", constraint,
"-resultType", "publication"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/dump/publication")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
Assertions.assertEquals(15, tmp.count());
JavaRDD<Publication> tmp_pubs = sc
.textFile(workingDir.toString() + "/original/publication")
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
Assertions.assertEquals(15, tmp_pubs.count());
}
@Test // Step2
void testSelectSubset() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/input/")
.getPath();
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/publication")
.getPath())
.saveAsTextFile(workingDir.toString() + "/original/publication");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/software")
.getPath())
.saveAsTextFile(workingDir.toString() + "/original/software");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/dataset")
.getPath())
.saveAsTextFile(workingDir.toString() + "/original/dataset");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/otherresearchproduct")
.getPath())
.saveAsTextFile(workingDir.toString() + "/original/otherresearchproduct");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/publication")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/publication");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/software")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/software");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/dataset")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/dataset");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/otherresearchproduct")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/otherresearchproduct");
SparkSelectSubset
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-outputPath", workingDir.toString()
});
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/original/relation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Assertions.assertEquals(20, tmp.count());
Assertions
.assertEquals(
6, tmp.filter(r -> r.getSource().startsWith("50|") && r.getTarget().startsWith("50|")).count());
Assertions
.assertEquals(
3, tmp.filter(r -> r.getSource().startsWith("50|") && r.getTarget().startsWith("20|")).count());
Assertions
.assertEquals(
3, tmp.filter(r -> r.getSource().startsWith("20|") && r.getTarget().startsWith("50|")).count());
Assertions
.assertEquals(
4, tmp.filter(r -> r.getSource().startsWith("40|") && r.getTarget().startsWith("50|")).count());
Assertions
.assertEquals(
1, tmp.filter(r -> r.getSource().startsWith("10|") && r.getTarget().startsWith("20|")).count());
Assertions
.assertEquals(
1, tmp.filter(r -> r.getSource().startsWith("20|") && r.getTarget().startsWith("10|")).count());
Assertions
.assertEquals(
1, tmp.filter(r -> r.getSource().startsWith("20|") && r.getTarget().startsWith("40|")).count());
Assertions
.assertEquals(
1, tmp.filter(r -> r.getSource().startsWith("40|") && r.getTarget().startsWith("20|")).count());
JavaRDD<eu.dnetlib.dhp.schema.oaf.Datasource> tmp_datasource = sc
.textFile(workingDir.toString() + "/original/datasource")
.map(item -> OBJECT_MAPPER.readValue(item, Datasource.class));
Assertions.assertEquals(4, tmp_datasource.count());
Assertions
.assertEquals(
0,
tmp_datasource
.filter(d -> d.getId().equals("10|issn___print::0a79337eaf5145faa478785423273355"))
.count());
JavaRDD<Organization> tmp_organization = sc
.textFile(workingDir.toString() + "/original/organization")
.map(item -> OBJECT_MAPPER.readValue(item, Organization.class));
Assertions.assertEquals(3, tmp_organization.count());
JavaRDD<Project> tmp_project = sc
.textFile(workingDir.toString() + "/original/project")
.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
Assertions.assertEquals(3, tmp_project.count());
}
@Test
public void dumpEntitiesTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/organization")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Organization",
"-outputPath", workingDir.toString() + "/dump"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.oa.model.graph.Organization> tmp = sc
.textFile(workingDir.toString() + "/dump")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Organization.class));
Assertions.assertEquals(3, tmp.count());
tmp
.foreach(
o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
}
@Test
public void selectValidContextTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/communityMap")
.getPath();
final String contextPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/context/community_infrastructure")
.getPath();
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
SparkSelectValidContext
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-outputPath", workingDir.toString() + "/dump/community_infrastructure",
"-communityMapPath", communityMapPath,
"-contextPath", contextPath
});
JavaRDD<ResearchCommunity> tmp = sc
.textFile(workingDir.toString() + "/dump/community_infrastructure")
.map(item -> OBJECT_MAPPER.readValue(item, ResearchCommunity.class));
Assertions.assertEquals(5, tmp.count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("enermaps")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("eutopia")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("dh-ch")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("beopen")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("neanias-underwater")).count());
}
@Test
public void selectValidRelationContextTest() throws Exception {
// final String contextPath = parser.get("contextPath");
// log.info("contextPath: {}", contextPath);
//
// final String contextRelationPath = parser.get("contextRelationPath");
// log.info("contextRelationPath: {}", contextRelationPath);
// final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/")
// .getPath();
//
// final String communityMapPath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/communityMap")
// .getPath();
//
// final String contextPath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/context/community_infrastructure")
// .getPath();
//
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
// SparkSelectValidRelationContext
// .main(
// new String[] {
// "-isSparkSessionManaged", Boolean.FALSE.toString(),
// "-sourcePath", sourcePath,
// "-outputPath", workingDir.toString() + "/dump/community_infrastructure",
// "-communityMapPath", communityMapPath,
// "-contextPath", contextPath
//
// });
//
JavaRDD<ResearchCommunity> tmp = sc
.textFile(workingDir.toString() + "/dump/community_infrastructure")
.map(item -> OBJECT_MAPPER.readValue(item, ResearchCommunity.class));
Assertions.assertEquals(5, tmp.count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("enermaps")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("eutopia")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("dh-ch")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("beopen")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("neanias-underwater")).count());
}
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{"eut":"European University of Technology","covid-19":"COVID-19","dariah":"DARIAH EU","aurora":"Aurora Universities Network","neanias-space":"NEANIAS Space Research Community","eutopia":"EUTOPIA Alliance","neanias-underwater":"NEANIAS Underwater Research Community","neanias-atmospheric":"NEANIAS Atmospheric Research Community","beopen":"Transport Research","sdsn-gr":"SDSN - Greece","elixir-gr":"ELIXIR GR","rural-digital-europe":"Rural Digital Europe","north-america-studies":"North America Studies","mes":"European Marine Science","enermaps":"Energy Research","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,5 @@
{"id":"00|context_____::e15922110564cf669aaed346e871bc01","acronym":"eutopia","name":"EUTOPIA Open Research Portal","type":"Research Community","description":"<p style=text-align:justify>EUTOPIA is an ambitious alliance of 10 like-minded universities ready to reinvent themselves: the Babeș-Bolyai University in Cluj-Napoca (Romania), the Vrije Universiteit Brussels (Belgium), the Ca&#39;Foscari University of Europe (Italy), CY Cergy Paris Universit&eacute; (France), the Technische Universit&auml;t Dresden (Germany), the University of Gothenburg (Sweden), the University of Ljubljana (Slovenia), the NOVA University Lisbon (Portugal), the University of Pompeu Fabra (Spain) and the University of Warwick (United Kingdom). Together, these 10 pioneers join forces to build the university of the future.</p>","zenodo_community":null}
{"id":"00|context_____::aa0e56dd2e9d2a0be749f5debdd2b3d8","acronym":"enermaps","name":"Welcome to EnerMaps Gateway! Find the latest scientific data.","type":"Research Community","description":"","zenodo_community":null,"subject":[]}
{"id":"00|context_____::6f567d9abd1c6603b0c0205a832bc757","acronym":"neanias-underwater","name":"NEANIAS Underwater Research Community","type":"Research Community","description":"","zenodo_community":null,"subject":["Ocean mapping","Multibeam Backscatter","Bathymetry","Seabed classification","Submarine Geomorphology","Underwater Photogrammetry"]}
{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","acronym":"dh-ch","name":"Digital Humanities and Cultural Heritage","type":"Research Community","description":"This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.","zenodo_community":"https://zenodo.org/communities/oac_dh-ch","subject":["modern art","monuments","europeana data model","field walking","frescoes","LIDO metadata schema","art history","excavation","Arts and Humanities General","coins","temples","numismatics","lithics","environmental archaeology","digital cultural heritage","archaeological reports","history","CRMba","churches","cultural heritage","archaeological stratigraphy","religious art","digital humanities","archaeological sites","linguistic studies","bioarchaeology","architectural orders","palaeoanthropology","fine arts","europeana","CIDOC CRM","decorations","classic art","stratigraphy","digital archaeology","intangible cultural heritage","walls","chapels","CRMtex","Language and Literature","paintings","archaeology","mosaics","burials","medieval art","castles","CARARE metadata schema","statues","natural language processing","inscriptions","CRMsci","vaults","contemporary art","Arts and Humanities","CRMarchaeo","pottery"]}
{"id":"00|context_____::5fde864866ea5ded4cc873b3170b63c3","acronym":"beopen","name":"Transport Research","type":"Research Community","description":"Welcome to the Open Research Gateway for Transport Research. This gateway is part of the TOPOS Observatory (https://www.topos-observatory.eu). The TOPOS aims to showcase the status and progress of open science uptake in transport research. It focuses on promoting territorial and cross border cooperation and contributing in the optimization of open science in transport research.\nThe TOPOS Observatory is supported by the EC H2020 BEOPEN project (824323)","zenodo_community":"https://zenodo.org/communities/be-open-transport","subject":["Green Transport","City mobility systems","Vulnerable road users","Traffic engineering","Transport electrification","Intermodal freight transport","Clean vehicle fleets","Intelligent mobility","Inflight refueling","District mobility systems","Navigation and control systems for optimised planning and routing","European Space Technology Platform","European Transport networks","Green cars","Inter-modality infrastructures","Advanced Take Off and Landing Ideas","Sustainable urban systems","port-area railway networks","Innovative forms of urban transport","Alliance for Logistics Innovation through Collaboration in Europe","Advisory Council for Aeronautics Research in Europe","Mobility services for people and goods","Guidance and traffic management","Passenger mobility","Smart mobility and services","transport innovation","high-speed railway","Vehicle design","Inland shipping","public transportation","aviations climate impact","Road transport","On-demand public transport","Personal Air Transport","Pipeline transport","European Association of Aviation Training and Education Organisations","Defrosting of railway infrastructure","Inclusive and affordable transport","River Information Services","jel:L92","Increased use of public transport","Seamless mobility","STRIA","trolleybus transport","Intelligent Transport System","Low-emission alternative energy for transport","Shared mobility for people and goods","Business model for urban mobility","Interoperability of transport systems","Cross-border train slot booking","Air transport","Transport pricing","Sustainable transport","European Rail Transport Research Advisory Council","Alternative aircraft configurations","Railways applications","urban transport","Environmental impact of transport","urban freight delivery systems","Automated Road Transport","Alternative fuels in public transport","Active LIDAR-sensor for GHG-measurements","Autonomous logistics operations","Rational use of motorised transport","Network and traffic management systems","electrification of railway wagons","Single European Sky","Electrified road systems","Railway dynamics","Motorway of the Sea","smart railway communications","Maritime transport","Environmental- friendly transport","Combined transport","Connected automated driving technology","Innovative freight logistics services","automated and shared vehicles","Alternative Aircraft Systems","Land-use and transport interaction","Public transport system","Business plan for shared mobility","Shared mobility","Growing of mobility demand","European Road Transport Research Advisory Council","WATERBORNE ETP","Effective transport management system","Short Sea Shipping","air traffic management","Sea hubs and the motorways of the sea","Urban mobility solutions","Smart city planning","Maritime spatial planning","EUropean rail Research Network of Excellence","ENERGY CONSUMPTION BY THE TRANSPORT SECTOR","Integrated urban plan","inland waterway services","European Conference of Transport Research Institutes","air vehicles","E-freight","Automated Driving","Automated ships","pricing for cross-border passenger transport","Vehicle efficiency","Railway transport","Electric vehicles","Road traffic monitoring","Deep sea shipping","Circular economy in transport","Traffic congestion","air transport system","Urban logistics","Rail transport","OpenStreetMap","high speed rail","Transportation engineering","Intermodal travel information","Flight Data Recorders","Advanced driver assistance systems","long distance freight transport","Inland waterway transport","Smart mobility","Mobility integration","Personal Rapid Transit system","Safety measures & requirements for roads","Green rail transport","Vehicle manufacturing","Future Airport Layout","Rail technologies","European Intermodal Research Advisory Council","inland navigation","Automated urban vehicles","ECSS-standards","Traveller services","Polluting transport","Air Traffic Control","Cooperative and connected and automated transport","Innovative powertrains","Quality of transport system and services","door-to- door logistics chain","Inter-modal aspects of urban mobility","Innovative freight delivery systems","urban freight delivery infrastructures"]}

View File

@ -0,0 +1,3 @@
{"legalshortname":"SHoF","legalname":"Swedish House of Finance","websiteurl":"http://houseoffinance.se/","alternativenames":["SHoF"],"country":{"code":"SE","label":"Sweden"},"id":"20|grid________::87698402476531ba39e61f1df38f2a91","pid":[{"scheme":"grid","value":"grid.451954.8"}]}
{"legalshortname":"Korean Elementary Moral Education Society","legalname":"Korean Elementary Moral Education Society","websiteurl":"http://www.ethics.or.kr/","alternativenames":["한국초등도덕교육학회"],"country":{"code":"KR","label":"Korea (Republic of)"},"id":"20|grid________::bd5cbea5dc434b8fd811a880cb9d4a05","pid":[{"scheme":"grid","value":"grid.496778.3"}]}
{"legalshortname":"NHC","legalname":"National Health Council","websiteurl":"http://www.nationalhealthcouncil.org/","alternativenames":["NHC"],"country":{"code":"US","label":"United States"},"id":"20|grid________::94948cc036605bf4a00ec77ce5ca92d3","pid":[{"scheme":"grid","value":"grid.487707.b"}]}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,5 @@
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2018-06-05","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal of Applied Mathematics and Stochastic Analysis"},"extraInfo":[],"id":"10|doajarticles::1fa6859d71faa77b32d82f278c6ed1df","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj10489533"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal of Applied Mathematics and Stochastic Analysis"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1048-9533",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://www.hindawi.com/journals/jamsa"}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-05-25","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Pelitutkimuksen vuosikirja"},"extraInfo":[],"id":"10|doajarticles::9c4b678901e5276d9e3addee566816af","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj1798355X"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Pelitutkimuksen vuosikirja"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1798-355X",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geography. Anthropology. Recreation: Recreation. Leisure | Science: Mathematics: Instruments and machines: Electronic computers. Computer science: Computer software"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.pelitutkimus.fi"}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2018-06-05","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Statistika: Statistics and Economy Journal"},"extraInfo":[],"id":"10|doajarticles::a5314b60f79b869cb5d3a2709167bc3a","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj0322788X"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Statistika: Statistics and Economy Journal"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0322-788X",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Social Sciences: Statistics"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.czso.cz/statistika_journal"}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2018-06-05","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Review of Development Finance"},"extraInfo":[],"id":"10|doajarticles::acb7c79bb85d3b3a7b75389f5d9570f5","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj18799337"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Review of Development Finance"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1879-9337",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Social Sciences: Industries. Land use. Labor: Economic growth, development, planning | Social Sciences: Finance"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.journals.elsevier.com/review-of-development-finance/"}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-05-28","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The Journal of Advanced Navigation Technology"},"extraInfo":[],"id":"10|issn___print::0a79337eaf5145faa478785423273355","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"jrnl12269026"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The Journal of Advanced Navigation Technology"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1226-9026",null],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":null,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"re3data","classname":"re3data","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"r3d100012161"}],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false}}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,30 @@
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|DansKnawCris::4669a378a73661417182c208e6fdab53","subRelType":"provision","target":"50|DansKnawCris::b90247718304c409331edb82fd0e8d56"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|DansKnawCris::4669a378a73661417182c208e6fdab53","subRelType":"provision","target":"50|DansKnawCris::ba8f1ea3adf1bba501ec9da3a58e9638"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|DansKnawCris::4669a378a73661417182c208e6fdab53","subRelType":"provision","target":"50|datacite____::05c611fdfc93d7a2a703d1324e28104a"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|DansKnawCris::52c4541c9bffde34daa945ece8dcf635","subRelType":"provision","target":"50|datacite____::0a15c3ad876db2ffa2f8f1c9a63c3cd0"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|DansKnawCris::52c4541c9bffde34daa945ece8dcf635","subRelType":"provision","target":"50|datacite____::150d96a57b37c02f5d14a6ace965d790"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|DansKnawCris::52c4541c9bffde34daa945ece8dcf635","subRelType":"provision","target":"50|datacite____::1e099cf76219212d554ad35b45d2345c"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|DansKnawCris::794d07c2e66f1fbf07d61b9bfca36dc2","subRelType":"provision","target":"20|grid________::bd5cbea5dc434b8fd811a880cb9d4a05"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|DansKnawCris::794d07c2e66f1fbf07d61b9bfca36dc2","subRelType":"provision","target":"20|grid________::87698402476531ba39e61f1df38f2a91"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|DansKnawCris::794d07c2e66f1fbf07d61b9bfca36dc2","subRelType":"provision","target":"20|grid________::94948cc036605bf4a00ec77ce5ca92d3"}
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"20|grid________::bd5cbea5dc434b8fd811a880cb9d4a05","subRelType":"provision","target":"50|DansKnawCris::794d07c2e66f1fbf07d61b9bfca36dc2"}
{"collectedfrom":[{"key":"10|infrastruct_::f66f1bd369679b5b077dcdf006089556","value":"OpenAIRE"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1594398578323,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"20|grid________::87698402476531ba39e61f1df38f2a91","subRelType":"provision","target":"50|DansKnawCris::794d07c2e66f1fbf07d61b9bfca36dc2"}
{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"20|grid________::94948cc036605bf4a00ec77ce5ca92d3","subRelType":"provision","target":"50|DansKnawCris::794d07c2e66f1fbf07d61b9bfca36dc2"}
{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","subRelType":"provision","target":"50|datacite____::016dfd6ecbfaa929bbd90ec3a2b51521"}
{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","subRelType":"provision","target":"50|datacite____::016dfd6ecbfaa929bbd90ec3a2b51521"}
{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"40|anr_________::1f21edc5c902be305ee47148955c6e50","subRelType":"provision","target":"50|datacite____::016dfd6ecbfaa929bbd90ec3a2b51521"}
{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","subRelType":"provision","target":"50|DansKnawCris::794d07c2e66f1fbf07d61b9bfca36dc2"}
{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::1fa6859d71faa77b32d82f278c6ed1df","subRelType":"provision","target":"20|opendoar____::589618708434cfc5b830601ac4b339ee"}
{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::9c4b678901e5276d9e3addee566816af","subRelType":"provision","target":"20|grid________::bd5cbea5dc434b8fd811a880cb9d4a05"}
{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"20|grid________::87698402476531ba39e61f1df38f2a91","subRelType":"provision","target":"10|doajarticles::a5314b60f79b869cb5d3a2709167bc3a"}
{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","subRelType":"provision","target":"20|grid________::94948cc036605bf4a00ec77ce5ca92d3"}
{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"20|grid________::bd5cbea5dc434b8fd811a880cb9d4a05","subRelType":"provision","target":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a"}
{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::cafe7980294aa5f935f433e7c8aab844","subRelType":"provision","target":"20|dedup_wf_001::2806db65ba8029ee196679cad067eff2"}
{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::0cd5965141113df5739f1ac7ac7f6d37","subRelType":"participation","target":"40|aka_________::1bc716a1763110da3eb1af867de718a8"}
{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::1e2df822bf0932ad0f77565789f22e17","subRelType":"participation","target":"40|aka_________::a6c805bcfd383bae043d8df38e79db78"}
{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8966"},"lastupdatetimestamp":1595258695262,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|aka_________::2c3aab6bce7516338b4dbfb4f6f86db7","subRelType":"affiliation","target":"50|dedup_wf_001::02859c30f6c8bfbdd8c427068a6ec684"}
{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::8bcd11b6bffc76f4d4f88d7a6728d614","subRelType":"participation","target":"40|aka_________::645123c3fe7bab557c36f0f9bb02a4cd"}
{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::8bcd11b6bffc76f4d4f88d7a6728d614","subRelType":"participation","target":"40|aka_________::fecf4f862a6b40dd2ccb1abc8fed5bc5"}
{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::a30b9a45766293af38951d767e77a471","subRelType":"participation","target":"40|aka_________::b733217d1cd609001dd3c75af419d872"}
{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::a30b9a45766293af38951d767e77a471","subRelType":"participation","target":"40|aka_________::c33dee8231ad0374caf93e52c5a473e5"}
{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::ddcb6d65425e4531a11c610488d42d81","subRelType":"participation","target":"40|aka_________::4d7027ac28c8ac9cc72b062ac4992b4e"}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,3 @@
{"collectedfrom":[{"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":"","provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1566902410217,"id":"20|grid________::87698402476531ba39e61f1df38f2a91","originalId":[],"pid":[{"value":"grid.451954.8","qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}}],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"legalshortname":{"value":"SHoF","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"legalname":{"value":"Swedish House of Finance","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"alternativeNames":[{"value":"SHoF","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}}],"websiteurl":{"value":"http://houseoffinance.se/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"logourl":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"eclegalbody":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"eclegalperson":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecnonprofit":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecresearchorganization":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"echighereducation":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecinternationalorganizationeurinterests":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecinternationalorganization":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecenterprise":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecsmevalidated":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecnutscode":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"country":{"classid":"SE","classname":"Sweden","schemeid":"dnet:countries","schemename":"dnet:countries"}}
{"collectedfrom":[{"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":"","provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1566902414749,"id":"20|grid________::bd5cbea5dc434b8fd811a880cb9d4a05","originalId":[],"pid":[{"value":"grid.496778.3","qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}}],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"legalshortname":{"value":"Korean Elementary Moral Education Society","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"legalname":{"value":"Korean Elementary Moral Education Society","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"alternativeNames":[{"value":"한국초등도덕교육학회","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}}],"websiteurl":{"value":"http://www.ethics.or.kr/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"logourl":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"eclegalbody":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"eclegalperson":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecnonprofit":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecresearchorganization":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"echighereducation":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecinternationalorganizationeurinterests":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecinternationalorganization":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecenterprise":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecsmevalidated":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecnutscode":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"country":{"classid":"KR","classname":"Korea (Republic of)","schemeid":"dnet:countries","schemename":"dnet:countries"}}
{"collectedfrom":[{"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":"","provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1566902413769,"id":"20|grid________::94948cc036605bf4a00ec77ce5ca92d3","originalId":[],"pid":[{"value":"grid.487707.b","qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}}],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"legalshortname":{"value":"NHC","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"legalname":{"value":"National Health Council","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"alternativeNames":[{"value":"NHC","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}}],"websiteurl":{"value":"http://www.nationalhealthcouncil.org/","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"logourl":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"eclegalbody":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"eclegalperson":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecnonprofit":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecresearchorganization":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"echighereducation":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecinternationalorganizationeurinterests":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecinternationalorganization":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecenterprise":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecsmevalidated":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"ecnutscode":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"country":{"classid":"US","classname":"United States","schemeid":"dnet:countries","schemename":"dnet:countries"}}

View File

@ -0,0 +1,3 @@
{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594398578323,"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","originalId":["aka_________::135027"],"pid":[],"dateofcollection":"2019-01-25","dateoftransformation":"2019-04-16","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"135027","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Fotoniikka ja modernit kuvantamismenetelmät LT","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"<fundingtree><funder>\n <id>aka_________::AKA</id>\n <shortname>AKA</shortname>\n <name>Academy of Finland</name>\n <originalname>Academy of Finland</originalname>\n <jurisdiction>FI</jurisdiction>\n </funder></fundingtree>","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"190,000 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0,"h2020topiccode":null,"h2020topicdescription":null,"h2020classification":null}
{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594398578323,"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","originalId":["aka_________::316061"],"pid":[],"dateofcollection":"2019-01-25","dateoftransformation":"2019-04-16","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"316061","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Academy Project Funding TT","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"<fundingtree><funder>\n <id>aka_________::AKA</id>\n <shortname>AKA</shortname>\n <name>Academy of Finland</name>\n <originalname>Academy of Finland</originalname>\n <jurisdiction>FI</jurisdiction>\n </funder></fundingtree>","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"450,000 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0,"h2020topiccode":null,"h2020topicdescription":null,"h2020classification":null}
{"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594398578323,"id":"40|anr_________::1f21edc5c902be305ee47148955c6e50","originalId":["anr_________::ANR-17-CE05-0033"],"pid":[],"dateofcollection":"2019-12-24","dateoftransformation":"2020-01-07","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"ANR-17-CE05-0033","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":{"value":"MOISE","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"title":{"value":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"<fundingtree><funder>\n <id>anr_________::ANR</id>\n <shortname>ANR</shortname>\n <name>French National Research Agency (ANR)</name>\n <originalname>Agence Nationale de la Recherche</originalname>\n <jurisdiction>FR</jurisdiction>\n </funder></fundingtree>","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":null,"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0,"h2020topiccode":null,"h2020topicdescription":null,"h2020classification":null}

File diff suppressed because one or more lines are too long