graph dump procedures migrated from dnet-hadoop
This commit is contained in:
parent
ec93139e54
commit
c3644fce3c
30
pom.xml
30
pom.xml
|
@ -50,6 +50,36 @@
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-sql_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>dom4j</groupId>
|
||||||
|
<artifactId>dom4j</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>jaxen</groupId>
|
||||||
|
<artifactId>jaxen</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-schemas</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
|
||||||
|
public class Constants {
|
||||||
|
|
||||||
|
protected static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
|
||||||
|
protected static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
|
||||||
|
|
||||||
|
public static final String INFERRED = "Inferred by OpenAIRE";
|
||||||
|
public static final String CABF2 = "c_abf2";
|
||||||
|
|
||||||
|
public static final String HARVESTED = "Harvested";
|
||||||
|
public static final String DEFAULT_TRUST = "0.9";
|
||||||
|
public static final String USER_CLAIM = "Linked by user";
|
||||||
|
|
||||||
|
public static final String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
||||||
|
|
||||||
|
public static final String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/";
|
||||||
|
|
||||||
|
public static final String RESEARCH_COMMUNITY = "Research Community";
|
||||||
|
|
||||||
|
public static final String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
|
||||||
|
|
||||||
|
static {
|
||||||
|
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2);
|
||||||
|
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
||||||
|
accessRightsCoarMap.put("OPEN SOURCE", CABF2);
|
||||||
|
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_CLOSED, "c_14cb");
|
||||||
|
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_EMBARGO, "c_f1cf");
|
||||||
|
}
|
||||||
|
|
||||||
|
static {
|
||||||
|
coarCodeLabelMap.put(CABF2, ModelConstants.ACCESS_RIGHT_OPEN);
|
||||||
|
coarCodeLabelMap.put("c_16ec", "RESTRICTED");
|
||||||
|
coarCodeLabelMap.put("c_14cb", ModelConstants.ACCESS_RIGHT_CLOSED);
|
||||||
|
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum DUMPTYPE {
|
||||||
|
COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder");
|
||||||
|
|
||||||
|
private final String type;
|
||||||
|
|
||||||
|
DUMPTYPE(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,107 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* It fires the execution of the actual dump for result entities. If the dump is for RC/RI products its checks for each
|
||||||
|
* result its belongingess to at least one RC/RI before "asking" for its mapping.
|
||||||
|
*/
|
||||||
|
public class DumpProducts implements Serializable {
|
||||||
|
|
||||||
|
public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath,
|
||||||
|
Class<? extends OafEntity> inputClazz,
|
||||||
|
Class<? extends eu.dnetlib.dhp.schema.dump.oaf.Result> outputClazz,
|
||||||
|
String dumpType) {
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
execDump(
|
||||||
|
spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <I extends OafEntity, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> void execDump(
|
||||||
|
SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
String communityMapPath,
|
||||||
|
Class<I> inputClazz,
|
||||||
|
Class<O> outputClazz,
|
||||||
|
String dumpType) {
|
||||||
|
|
||||||
|
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map((MapFunction<I, O>) value -> execMap(value, communityMap, dumpType), Encoders.bean(outputClazz))
|
||||||
|
.filter((FilterFunction<O>) value -> value != null)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <I extends OafEntity, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> O execMap(I value,
|
||||||
|
CommunityMap communityMap,
|
||||||
|
String dumpType) throws NoAvailableEntityTypeException {
|
||||||
|
|
||||||
|
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
|
||||||
|
if (odInfo.isPresent()) {
|
||||||
|
if (odInfo.get().getDeletedbyinference() || odInfo.get().getInvisible()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Constants.DUMPTYPE.COMMUNITY.getType().equals(dumpType)) {
|
||||||
|
Set<String> communities = communityMap.keySet();
|
||||||
|
|
||||||
|
Optional<List<Context>> inputContext = Optional
|
||||||
|
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
|
||||||
|
if (!inputContext.isPresent()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
List<String> toDumpFor = inputContext.get().stream().map(c -> {
|
||||||
|
if (communities.contains(c.getId())) {
|
||||||
|
return c.getId();
|
||||||
|
}
|
||||||
|
if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
|
||||||
|
return c.getId().substring(0, c.getId().indexOf("::"));
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||||
|
if (toDumpFor.isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (O) ResultMapper.map(value, communityMap, dumpType);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,74 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.MakeTarArchive;
|
||||||
|
|
||||||
|
public class MakeTar implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(MakeTar.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
MakeTar.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("hdfsPath");
|
||||||
|
log.info("hdfsPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
|
log.info("nameNode: {}", hdfsNameNode);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("input path : {}", inputPath);
|
||||||
|
|
||||||
|
final int gBperSplit = Optional
|
||||||
|
.ofNullable(parser.get("splitSize"))
|
||||||
|
.map(Integer::valueOf)
|
||||||
|
.orElse(10);
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
|
makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
RemoteIterator<LocatedFileStatus> dirIterator = fileSystem.listLocatedStatus(new Path(inputPath));
|
||||||
|
|
||||||
|
while (dirIterator.hasNext()) {
|
||||||
|
LocatedFileStatus fileStatus = dirIterator.next();
|
||||||
|
|
||||||
|
Path p = fileStatus.getPath();
|
||||||
|
String pathString = p.toString();
|
||||||
|
String entity = pathString.substring(pathString.lastIndexOf("/") + 1);
|
||||||
|
|
||||||
|
MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,74 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Element;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
public class QueryInformationSystem {
|
||||||
|
|
||||||
|
private ISLookUpService isLookUp;
|
||||||
|
|
||||||
|
private static final String XQUERY_ALL = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||||
|
" and ($x//context/param[./@name = 'status']/text() = 'all') "
|
||||||
|
+
|
||||||
|
" return " +
|
||||||
|
"<community> " +
|
||||||
|
"{$x//CONFIGURATION/context/@id}" +
|
||||||
|
"{$x//CONFIGURATION/context/@label}" +
|
||||||
|
"</community>";
|
||||||
|
|
||||||
|
private static final String XQUERY_CI = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||||
|
" and $x//CONFIGURATION/context[./@id=%s] "
|
||||||
|
+
|
||||||
|
" return " +
|
||||||
|
"<community> " +
|
||||||
|
"{$x//CONFIGURATION/context/@id}" +
|
||||||
|
"{$x//CONFIGURATION/context/@label}" +
|
||||||
|
"</community>";
|
||||||
|
|
||||||
|
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
|
||||||
|
throws ISLookUpException, DocumentException, SAXException {
|
||||||
|
if (singleCommunity)
|
||||||
|
return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + communityId + "'")));
|
||||||
|
return getMap(isLookUp.quickSearchProfile(XQUERY_ALL));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public ISLookUpService getIsLookUp() {
|
||||||
|
return isLookUp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIsLookUp(ISLookUpService isLookUpService) {
|
||||||
|
this.isLookUp = isLookUpService;
|
||||||
|
}
|
||||||
|
|
||||||
|
private CommunityMap getMap(List<String> communityMap) throws DocumentException, SAXException {
|
||||||
|
final CommunityMap map = new CommunityMap();
|
||||||
|
|
||||||
|
for (String xml : communityMap) {
|
||||||
|
final Document doc;
|
||||||
|
final SAXReader reader = new SAXReader();
|
||||||
|
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||||
|
doc = reader.read(new StringReader(xml));
|
||||||
|
Element root = doc.getRootElement();
|
||||||
|
map.put(root.attribute("id").getValue(), root.attribute("label").getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,672 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.AccessRight;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.GeoLocation;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Instance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Measure;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CfHbKeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Context;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
|
public class ResultMapper implements Serializable {
|
||||||
|
|
||||||
|
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
|
||||||
|
E in, Map<String, String> communityMap, String dumpType) throws NoAvailableEntityTypeException {
|
||||||
|
|
||||||
|
Result out;
|
||||||
|
if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
|
||||||
|
out = new GraphResult();
|
||||||
|
} else {
|
||||||
|
out = new CommunityResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in;
|
||||||
|
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> ort = Optional.ofNullable(input.getResulttype());
|
||||||
|
if (ort.isPresent()) {
|
||||||
|
try {
|
||||||
|
|
||||||
|
addTypeSpecificInformation(out, input, ort);
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getAuthor())
|
||||||
|
.ifPresent(
|
||||||
|
ats -> out.setAuthor(ats.stream().map(ResultMapper::getAuthor).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
// I do not map Access Right UNKNOWN or OTHER
|
||||||
|
|
||||||
|
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
|
||||||
|
if (oar.isPresent() && Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
|
||||||
|
String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
|
||||||
|
out
|
||||||
|
.setBestaccessright(
|
||||||
|
|
||||||
|
BestAccessRight
|
||||||
|
.newInstance(
|
||||||
|
code,
|
||||||
|
Constants.coarCodeLabelMap.get(code),
|
||||||
|
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<String> contributorList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getContributor())
|
||||||
|
.ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue())));
|
||||||
|
out.setContributor(contributorList);
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getCountry())
|
||||||
|
.ifPresent(
|
||||||
|
value -> out
|
||||||
|
.setCountry(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
c -> {
|
||||||
|
if (c.getClassid().equals((ModelConstants.UNKNOWN))) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
ResultCountry country = new ResultCountry();
|
||||||
|
country.setCode(c.getClassid());
|
||||||
|
country.setLabel(c.getClassname());
|
||||||
|
Optional
|
||||||
|
.ofNullable(c.getDataInfo())
|
||||||
|
.ifPresent(
|
||||||
|
provenance -> country
|
||||||
|
.setProvenance(
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
provenance
|
||||||
|
.getProvenanceaction()
|
||||||
|
.getClassname(),
|
||||||
|
c.getDataInfo().getTrust())));
|
||||||
|
return country;
|
||||||
|
})
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
final List<String> coverageList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getCoverage())
|
||||||
|
.ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue())));
|
||||||
|
out.setCoverage(coverageList);
|
||||||
|
|
||||||
|
out.setDateofcollection(input.getDateofcollection());
|
||||||
|
|
||||||
|
final List<String> descriptionList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getDescription())
|
||||||
|
.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
|
||||||
|
out.setDescription(descriptionList);
|
||||||
|
Optional<Field<String>> oStr = Optional.ofNullable(input.getEmbargoenddate());
|
||||||
|
if (oStr.isPresent()) {
|
||||||
|
out.setEmbargoenddate(oStr.get().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<String> formatList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getFormat())
|
||||||
|
.ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
|
||||||
|
out.setFormat(formatList);
|
||||||
|
out.setId(input.getId());
|
||||||
|
out.setOriginalId(new ArrayList<>());
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getOriginalId())
|
||||||
|
.ifPresent(
|
||||||
|
v -> out
|
||||||
|
.setOriginalId(
|
||||||
|
input
|
||||||
|
.getOriginalId()
|
||||||
|
.stream()
|
||||||
|
.filter(s -> !s.startsWith("50|"))
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional<List<eu.dnetlib.dhp.schema.oaf.Instance>> oInst = Optional
|
||||||
|
.ofNullable(input.getInstance());
|
||||||
|
|
||||||
|
if (oInst.isPresent()) {
|
||||||
|
if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
|
||||||
|
((GraphResult) out)
|
||||||
|
.setInstance(
|
||||||
|
oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList()));
|
||||||
|
} else {
|
||||||
|
((CommunityResult) out)
|
||||||
|
.setInstance(
|
||||||
|
oInst
|
||||||
|
.get()
|
||||||
|
.stream()
|
||||||
|
.map(ResultMapper::getCommunityInstance)
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oL = Optional.ofNullable(input.getLanguage());
|
||||||
|
if (oL.isPresent()) {
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get();
|
||||||
|
out.setLanguage(Language.newInstance(language.getClassid(), language.getClassname()));
|
||||||
|
}
|
||||||
|
Optional<Long> oLong = Optional.ofNullable(input.getLastupdatetimestamp());
|
||||||
|
if (oLong.isPresent()) {
|
||||||
|
out.setLastupdatetimestamp(oLong.get());
|
||||||
|
}
|
||||||
|
Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
|
||||||
|
if (otitle.isPresent()) {
|
||||||
|
List<StructuredProperty> iTitle = otitle
|
||||||
|
.get()
|
||||||
|
.stream()
|
||||||
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if (!iTitle.isEmpty()) {
|
||||||
|
out.setMaintitle(iTitle.get(0).getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
iTitle = otitle
|
||||||
|
.get()
|
||||||
|
.stream()
|
||||||
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if (!iTitle.isEmpty()) {
|
||||||
|
out.setSubtitle(iTitle.get(0).getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getPid())
|
||||||
|
.ifPresent(
|
||||||
|
value -> out
|
||||||
|
.setPid(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
p -> ResultPid
|
||||||
|
.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
oStr = Optional.ofNullable(input.getDateofacceptance());
|
||||||
|
if (oStr.isPresent()) {
|
||||||
|
out.setPublicationdate(oStr.get().getValue());
|
||||||
|
}
|
||||||
|
oStr = Optional.ofNullable(input.getPublisher());
|
||||||
|
if (oStr.isPresent()) {
|
||||||
|
out.setPublisher(oStr.get().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getSource())
|
||||||
|
.ifPresent(
|
||||||
|
value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
List<Subject> subjectList = new ArrayList<>();
|
||||||
|
Optional
|
||||||
|
.ofNullable(input.getSubject())
|
||||||
|
.ifPresent(
|
||||||
|
value -> value
|
||||||
|
.forEach(s -> subjectList.add(getSubject(s))));
|
||||||
|
|
||||||
|
out.setSubjects(subjectList);
|
||||||
|
|
||||||
|
out.setType(input.getResulttype().getClassid());
|
||||||
|
|
||||||
|
if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
|
||||||
|
((CommunityResult) out)
|
||||||
|
.setCollectedfrom(
|
||||||
|
input
|
||||||
|
.getCollectedfrom()
|
||||||
|
.stream()
|
||||||
|
.map(cf -> CfHbKeyValue.newInstance(cf.getKey(), cf.getValue()))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
|
Set<String> communities = communityMap.keySet();
|
||||||
|
List<Context> contextList = Optional
|
||||||
|
.ofNullable(
|
||||||
|
input
|
||||||
|
.getContext())
|
||||||
|
.map(
|
||||||
|
value -> value
|
||||||
|
.stream()
|
||||||
|
.map(c -> {
|
||||||
|
String communityId = c.getId();
|
||||||
|
if (communityId.contains("::")) {
|
||||||
|
communityId = communityId.substring(0, communityId.indexOf("::"));
|
||||||
|
}
|
||||||
|
if (communities.contains(communityId)) {
|
||||||
|
Context context = new Context();
|
||||||
|
context.setCode(communityId);
|
||||||
|
context.setLabel(communityMap.get(communityId));
|
||||||
|
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
|
||||||
|
if (dataInfo.isPresent()) {
|
||||||
|
List<Provenance> provenance = new ArrayList<>();
|
||||||
|
provenance
|
||||||
|
.addAll(
|
||||||
|
dataInfo
|
||||||
|
.get()
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
di -> Optional
|
||||||
|
.ofNullable(di.getProvenanceaction())
|
||||||
|
.map(
|
||||||
|
provenanceaction -> Provenance
|
||||||
|
.newInstance(
|
||||||
|
provenanceaction.getClassname(),
|
||||||
|
di.getTrust()))
|
||||||
|
.orElse(null))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toSet()));
|
||||||
|
|
||||||
|
try {
|
||||||
|
context.setProvenance(getUniqueProvenance(provenance));
|
||||||
|
} catch (NoAvailableEntityTypeException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return context;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
})
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>());
|
||||||
|
|
||||||
|
if (!contextList.isEmpty()) {
|
||||||
|
Set<Integer> hashValue = new HashSet<>();
|
||||||
|
List<Context> remainigContext = new ArrayList<>();
|
||||||
|
contextList.forEach(c -> {
|
||||||
|
if (!hashValue.contains(c.hashCode())) {
|
||||||
|
remainigContext.add(c);
|
||||||
|
hashValue.add(c.hashCode());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
((CommunityResult) out).setContext(remainigContext);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (ClassCastException cce) {
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void addTypeSpecificInformation(Result out, eu.dnetlib.dhp.schema.oaf.Result input,
|
||||||
|
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> ort) throws NoAvailableEntityTypeException {
|
||||||
|
switch (ort.get().getClassid()) {
|
||||||
|
case "publication":
|
||||||
|
Optional<Journal> journal = Optional
|
||||||
|
.ofNullable(((Publication) input).getJournal());
|
||||||
|
if (journal.isPresent()) {
|
||||||
|
Journal j = journal.get();
|
||||||
|
Container c = new Container();
|
||||||
|
c.setConferencedate(j.getConferencedate());
|
||||||
|
c.setConferenceplace(j.getConferenceplace());
|
||||||
|
c.setEdition(j.getEdition());
|
||||||
|
c.setEp(j.getEp());
|
||||||
|
c.setIss(j.getIss());
|
||||||
|
c.setIssnLinking(j.getIssnLinking());
|
||||||
|
c.setIssnOnline(j.getIssnOnline());
|
||||||
|
c.setIssnPrinted(j.getIssnPrinted());
|
||||||
|
c.setName(j.getName());
|
||||||
|
c.setSp(j.getSp());
|
||||||
|
c.setVol(j.getVol());
|
||||||
|
out.setContainer(c);
|
||||||
|
out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname());
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "dataset":
|
||||||
|
Dataset id = (Dataset) input;
|
||||||
|
Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue()));
|
||||||
|
Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue()));
|
||||||
|
|
||||||
|
out
|
||||||
|
.setGeolocation(
|
||||||
|
Optional
|
||||||
|
.ofNullable(id.getGeolocation())
|
||||||
|
.map(
|
||||||
|
igl -> igl
|
||||||
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.map(gli -> {
|
||||||
|
GeoLocation gl = new GeoLocation();
|
||||||
|
gl.setBox(gli.getBox());
|
||||||
|
gl.setPlace(gli.getPlace());
|
||||||
|
gl.setPoint(gli.getPoint());
|
||||||
|
return gl;
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(null));
|
||||||
|
|
||||||
|
out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname());
|
||||||
|
break;
|
||||||
|
case "software":
|
||||||
|
|
||||||
|
Software is = (Software) input;
|
||||||
|
Optional
|
||||||
|
.ofNullable(is.getCodeRepositoryUrl())
|
||||||
|
.ifPresent(value -> out.setCodeRepositoryUrl(value.getValue()));
|
||||||
|
Optional
|
||||||
|
.ofNullable(is.getDocumentationUrl())
|
||||||
|
.ifPresent(
|
||||||
|
value -> out
|
||||||
|
.setDocumentationUrl(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(Field::getValue)
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(is.getProgrammingLanguage())
|
||||||
|
.ifPresent(value -> out.setProgrammingLanguage(value.getClassid()));
|
||||||
|
|
||||||
|
out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname());
|
||||||
|
break;
|
||||||
|
case "other":
|
||||||
|
|
||||||
|
OtherResearchProduct ir = (OtherResearchProduct) input;
|
||||||
|
out
|
||||||
|
.setContactgroup(
|
||||||
|
Optional
|
||||||
|
.ofNullable(ir.getContactgroup())
|
||||||
|
.map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
|
||||||
|
.orElse(null));
|
||||||
|
|
||||||
|
out
|
||||||
|
.setContactperson(
|
||||||
|
Optional
|
||||||
|
.ofNullable(ir.getContactperson())
|
||||||
|
.map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
|
||||||
|
.orElse(null));
|
||||||
|
out
|
||||||
|
.setTool(
|
||||||
|
Optional
|
||||||
|
.ofNullable(ir.getTool())
|
||||||
|
.map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
|
||||||
|
.orElse(null));
|
||||||
|
|
||||||
|
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
|
||||||
|
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new NoAvailableEntityTypeException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Instance getGraphInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
|
||||||
|
Instance instance = new Instance();
|
||||||
|
|
||||||
|
setCommonValue(i, instance);
|
||||||
|
|
||||||
|
return instance;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static CommunityInstance getCommunityInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
|
||||||
|
CommunityInstance instance = new CommunityInstance();
|
||||||
|
|
||||||
|
setCommonValue(i, instance);
|
||||||
|
|
||||||
|
instance
|
||||||
|
.setCollectedfrom(
|
||||||
|
CfHbKeyValue
|
||||||
|
.newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));
|
||||||
|
|
||||||
|
instance
|
||||||
|
.setHostedby(
|
||||||
|
CfHbKeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue()));
|
||||||
|
|
||||||
|
return instance;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
|
||||||
|
Optional<eu.dnetlib.dhp.schema.oaf.AccessRight> opAr = Optional.ofNullable(i.getAccessright());
|
||||||
|
|
||||||
|
if (opAr.isPresent() && Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
|
||||||
|
String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
|
||||||
|
|
||||||
|
instance
|
||||||
|
.setAccessright(
|
||||||
|
AccessRight
|
||||||
|
.newInstance(
|
||||||
|
code,
|
||||||
|
Constants.coarCodeLabelMap.get(code),
|
||||||
|
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
||||||
|
|
||||||
|
Optional<List<eu.dnetlib.dhp.schema.oaf.Measure>> mes = Optional.ofNullable(i.getMeasures());
|
||||||
|
if (mes.isPresent()) {
|
||||||
|
List<Measure> measure = new ArrayList<>();
|
||||||
|
mes
|
||||||
|
.get()
|
||||||
|
.forEach(
|
||||||
|
m -> m.getUnit().forEach(u -> measure.add(Measure.newInstance(m.getId(), u.getValue()))));
|
||||||
|
instance.setMeasures(measure);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opAr.get().getOpenAccessRoute() != null) {
|
||||||
|
switch (opAr.get().getOpenAccessRoute()) {
|
||||||
|
case hybrid:
|
||||||
|
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.hybrid);
|
||||||
|
break;
|
||||||
|
case gold:
|
||||||
|
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.gold);
|
||||||
|
break;
|
||||||
|
case green:
|
||||||
|
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.green);
|
||||||
|
break;
|
||||||
|
case bronze:
|
||||||
|
instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.bronze);
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getPid())
|
||||||
|
.ifPresent(
|
||||||
|
pid -> instance
|
||||||
|
.setPid(
|
||||||
|
pid
|
||||||
|
.stream()
|
||||||
|
.map(p -> ResultPid.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getAlternateIdentifier())
|
||||||
|
.ifPresent(
|
||||||
|
ai -> instance
|
||||||
|
.setAlternateIdentifier(
|
||||||
|
ai
|
||||||
|
.stream()
|
||||||
|
.map(p -> AlternateIdentifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getLicense())
|
||||||
|
.ifPresent(value -> instance.setLicense(value.getValue()));
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getDateofacceptance())
|
||||||
|
.ifPresent(value -> instance.setPublicationdate(value.getValue()));
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getRefereed())
|
||||||
|
.ifPresent(value -> instance.setRefereed(value.getClassname()));
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getInstancetype())
|
||||||
|
.ifPresent(value -> instance.setType(value.getClassname()));
|
||||||
|
Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value));
|
||||||
|
Optional<Field<String>> oPca = Optional.ofNullable(i.getProcessingchargeamount());
|
||||||
|
Optional<Field<String>> oPcc = Optional.ofNullable(i.getProcessingchargecurrency());
|
||||||
|
if (oPca.isPresent() && oPcc.isPresent()) {
|
||||||
|
Field<String> pca = oPca.get();
|
||||||
|
Field<String> pcc = oPcc.get();
|
||||||
|
if (!pca.getValue().trim().equals("") && !pcc.getValue().trim().equals("")) {
|
||||||
|
APC apc = new APC();
|
||||||
|
apc.setCurrency(oPcc.get().getValue());
|
||||||
|
apc.setAmount(oPca.get().getValue());
|
||||||
|
instance.setArticleprocessingcharge(apc);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
Optional.ofNullable(i.getUrl()).ifPresent(instance::setUrl);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Provenance> getUniqueProvenance(List<Provenance> provenance)
|
||||||
|
throws NoAvailableEntityTypeException {
|
||||||
|
Provenance iProv = new Provenance();
|
||||||
|
|
||||||
|
Provenance hProv = new Provenance();
|
||||||
|
Provenance lProv = new Provenance();
|
||||||
|
|
||||||
|
for (Provenance p : provenance) {
|
||||||
|
switch (p.getProvenance()) {
|
||||||
|
case Constants.HARVESTED:
|
||||||
|
hProv = getHighestTrust(hProv, p);
|
||||||
|
break;
|
||||||
|
case Constants.INFERRED:
|
||||||
|
iProv = getHighestTrust(iProv, p);
|
||||||
|
// To be removed as soon as the new beta run has been done
|
||||||
|
// this fixex issue of not set trust during bulktagging
|
||||||
|
if (StringUtils.isEmpty(iProv.getTrust())) {
|
||||||
|
iProv.setTrust(Constants.DEFAULT_TRUST);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Constants.USER_CLAIM:
|
||||||
|
lProv = getHighestTrust(lProv, p);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new NoAvailableEntityTypeException();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return Arrays
|
||||||
|
.asList(iProv, hProv, lProv)
|
||||||
|
.stream()
|
||||||
|
.filter(p -> !StringUtils.isEmpty(p.getProvenance()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Provenance getHighestTrust(Provenance hProv, Provenance p) {
|
||||||
|
if (StringUtils.isNoneEmpty(hProv.getTrust(), p.getTrust()))
|
||||||
|
return hProv.getTrust().compareTo(p.getTrust()) > 0 ? hProv : p;
|
||||||
|
|
||||||
|
return (StringUtils.isEmpty(p.getTrust()) && !StringUtils.isEmpty(hProv.getTrust())) ? hProv : p;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Subject getSubject(StructuredProperty s) {
|
||||||
|
Subject subject = new Subject();
|
||||||
|
subject.setSubject(SubjectSchemeValue.newInstance(s.getQualifier().getClassid(), s.getValue()));
|
||||||
|
Optional<DataInfo> di = Optional.ofNullable(s.getDataInfo());
|
||||||
|
if (di.isPresent()) {
|
||||||
|
Provenance p = new Provenance();
|
||||||
|
p.setProvenance(di.get().getProvenanceaction().getClassname());
|
||||||
|
p.setTrust(di.get().getTrust());
|
||||||
|
subject.setProvenance(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
return subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
|
||||||
|
Author a = new Author();
|
||||||
|
a.setFullname(oa.getFullname());
|
||||||
|
a.setName(oa.getName());
|
||||||
|
a.setSurname(oa.getSurname());
|
||||||
|
a.setRank(oa.getRank());
|
||||||
|
|
||||||
|
Optional<List<StructuredProperty>> oPids = Optional
|
||||||
|
.ofNullable(oa.getPid());
|
||||||
|
if (oPids.isPresent()) {
|
||||||
|
AuthorPid pid = getOrcid(oPids.get());
|
||||||
|
if (pid != null) {
|
||||||
|
a.setPid(pid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static AuthorPid getAuthorPid(StructuredProperty pid) {
|
||||||
|
Optional<DataInfo> di = Optional.ofNullable(pid.getDataInfo());
|
||||||
|
if (di.isPresent()) {
|
||||||
|
return AuthorPid
|
||||||
|
.newInstance(
|
||||||
|
AuthorPidSchemeValue
|
||||||
|
.newInstance(
|
||||||
|
pid.getQualifier().getClassid(),
|
||||||
|
pid.getValue()),
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
di.get().getProvenanceaction().getClassname(),
|
||||||
|
di.get().getTrust()));
|
||||||
|
} else {
|
||||||
|
return AuthorPid
|
||||||
|
.newInstance(
|
||||||
|
AuthorPidSchemeValue
|
||||||
|
.newInstance(
|
||||||
|
pid.getQualifier().getClassid(),
|
||||||
|
pid.getValue())
|
||||||
|
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static AuthorPid getOrcid(List<StructuredProperty> p) {
|
||||||
|
List<StructuredProperty> pidList = p.stream().map(pid -> {
|
||||||
|
if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) ||
|
||||||
|
(pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))) {
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||||
|
|
||||||
|
if (pidList.size() == 1) {
|
||||||
|
return getAuthorPid(pidList.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
List<StructuredProperty> orcid = pidList
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
ap -> ap
|
||||||
|
.getQualifier()
|
||||||
|
.getClassid()
|
||||||
|
.equals(ModelConstants.ORCID))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if (orcid.size() == 1) {
|
||||||
|
return getAuthorPid(orcid.get(0));
|
||||||
|
}
|
||||||
|
orcid = pidList
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
ap -> ap
|
||||||
|
.getQualifier()
|
||||||
|
.getClassid()
|
||||||
|
.equals(ModelConstants.ORCID_PENDING))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if (orcid.size() == 1) {
|
||||||
|
return getAuthorPid(orcid.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,98 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the
|
||||||
|
* context that will guide the dump of the results. The information saved is a HashMap. The key is the id of a community
|
||||||
|
* - research infrastructure/initiative , the value is the label of the research community - research
|
||||||
|
* infrastructure/initiative.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class SaveCommunityMap implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
|
||||||
|
private final transient QueryInformationSystem queryInformationSystem;
|
||||||
|
|
||||||
|
private final transient BufferedWriter writer;
|
||||||
|
|
||||||
|
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
|
||||||
|
final Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
|
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
queryInformationSystem = new QueryInformationSystem();
|
||||||
|
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
||||||
|
|
||||||
|
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
|
||||||
|
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SaveCommunityMap.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String nameNode = parser.get("nameNode");
|
||||||
|
log.info("nameNode: {}", nameNode);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||||
|
log.info("isLookUpUrl: {}", isLookUpUrl);
|
||||||
|
|
||||||
|
final Boolean singleCommunity = Optional
|
||||||
|
.ofNullable(parser.get("singleDeposition"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(false);
|
||||||
|
|
||||||
|
final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null);
|
||||||
|
|
||||||
|
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl);
|
||||||
|
|
||||||
|
scm.saveCommunityMap(singleCommunity, community_id);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveCommunityMap(boolean singleCommunity, String communityId)
|
||||||
|
throws ISLookUpException, IOException, DocumentException, SAXException {
|
||||||
|
final String communityMapString = Utils.OBJECT_MAPPER
|
||||||
|
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId));
|
||||||
|
log.info("communityMap {} ", communityMapString);
|
||||||
|
writer
|
||||||
|
.write(
|
||||||
|
communityMapString);
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,100 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.*;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
|
||||||
|
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
|
|
||||||
|
public class SendToZenodoHDFS implements Serializable {
|
||||||
|
|
||||||
|
private static final String NEW = "new"; // to be used for a brand new deposition in zenodo
|
||||||
|
private static final String VERSION = "version"; // to be used to upload a new version of a published deposition
|
||||||
|
private static final String UPDATE = "update"; // to upload content to an open deposition not published
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws Exception, MissingConceptDoiException {
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
SendToZenodoHDFS.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json")));
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
|
final String access_token = parser.get("accessToken");
|
||||||
|
final String connection_url = parser.get("connectionUrl");
|
||||||
|
final String metadata = parser.get("metadata");
|
||||||
|
final String depositionType = parser.get("depositionType");
|
||||||
|
final String concept_rec_id = Optional
|
||||||
|
.ofNullable(parser.get("conceptRecordId"))
|
||||||
|
.orElse(null);
|
||||||
|
final Boolean publish = Optional
|
||||||
|
.ofNullable(parser.get("publish"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(false);
|
||||||
|
|
||||||
|
final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null);
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
|
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||||
|
.listFiles(
|
||||||
|
new Path(hdfsPath), true);
|
||||||
|
ZenodoAPIClient zenodoApiClient = new ZenodoAPIClient(connection_url, access_token);
|
||||||
|
switch (depositionType) {
|
||||||
|
case NEW:
|
||||||
|
zenodoApiClient.newDeposition();
|
||||||
|
break;
|
||||||
|
case VERSION:
|
||||||
|
if (concept_rec_id == null) {
|
||||||
|
throw new MissingConceptDoiException("No concept record id has been provided");
|
||||||
|
}
|
||||||
|
zenodoApiClient.newVersion(concept_rec_id);
|
||||||
|
break;
|
||||||
|
case UPDATE:
|
||||||
|
if (depositionId == null) {
|
||||||
|
throw new MissingConceptDoiException("No deposition id has been provided");
|
||||||
|
}
|
||||||
|
zenodoApiClient.uploadOpenDeposition(depositionId);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new NoAvailableEntityTypeException();
|
||||||
|
}
|
||||||
|
|
||||||
|
while (fileStatusListIterator.hasNext()) {
|
||||||
|
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||||
|
|
||||||
|
Path p = fileStatus.getPath();
|
||||||
|
String pString = p.toString();
|
||||||
|
if (!pString.endsWith("_SUCCESS")) {
|
||||||
|
String name = pString.substring(pString.lastIndexOf("/") + 1);
|
||||||
|
|
||||||
|
FSDataInputStream inputStream = fileSystem.open(p);
|
||||||
|
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if (!metadata.equals("")) {
|
||||||
|
zenodoApiClient.sendMretadata(metadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Boolean.TRUE.equals(publish)) {
|
||||||
|
zenodoApiClient.publish();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.complete.Constants;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
public class Utils {
|
||||||
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private Utils() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void removeOutputDir(SparkSession spark, String path) {
|
||||||
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <R> Dataset<R> readPath(
|
||||||
|
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||||
|
return spark
|
||||||
|
.read()
|
||||||
|
.textFile(inputPath)
|
||||||
|
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ISLookUpService getIsLookUpService(String isLookUpUrl) {
|
||||||
|
return ISLookupClientFactory.getLookUpService(isLookUpUrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getContextId(String id) {
|
||||||
|
|
||||||
|
return String
|
||||||
|
.format(
|
||||||
|
"%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
|
||||||
|
DHPUtils.md5(id));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CommunityMap getCommunityMap(SparkSession spark, String communityMapPath) {
|
||||||
|
|
||||||
|
return new Gson().fromJson(spark.read().textFile(communityMapPath).collectAsList().get(0), CommunityMap.class);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CommunityMap readCommunityMap(FileSystem fileSystem, String communityMapPath) throws IOException {
|
||||||
|
BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(communityMapPath))));
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
try {
|
||||||
|
String line;
|
||||||
|
while ((line = br.readLine()) != null) {
|
||||||
|
sb.append(line);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
br.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Gson().fromJson(sb.toString(), CommunityMap.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,8 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
public class CommunityMap extends HashMap<String, String> implements Serializable {
|
||||||
|
}
|
|
@ -0,0 +1,81 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Context;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class splits the dumped results according to the research community - research initiative/infrastructure they
|
||||||
|
* are related to. The information about the community is found in the element "context.id" in the result. Since the
|
||||||
|
* context that can be found in the result can be associated not only to communities, a community Map is provided. It
|
||||||
|
* will guide the splitting process. Note: the repartition(1) just before writing the results related to a community.
|
||||||
|
* This is a choice due to uploading constraints (just one file for each community) As soon as a better solution will be
|
||||||
|
* in place remove the repartition
|
||||||
|
*/
|
||||||
|
public class CommunitySplit implements Serializable {
|
||||||
|
|
||||||
|
public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath) {
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
||||||
|
execSplit(spark, inputPath, outputPath, communityMap);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void execSplit(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
CommunityMap communities) {
|
||||||
|
|
||||||
|
Dataset<CommunityResult> result = Utils
|
||||||
|
.readPath(spark, inputPath + "/publication", CommunityResult.class)
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
|
||||||
|
|
||||||
|
communities
|
||||||
|
.keySet()
|
||||||
|
.stream()
|
||||||
|
.forEach(c -> printResult(c, result, outputPath + "/" + communities.get(c).replace(" ", "_")));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void printResult(String c, Dataset<CommunityResult> result, String outputPath) {
|
||||||
|
Dataset<CommunityResult> communityProducts = result
|
||||||
|
.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));
|
||||||
|
|
||||||
|
communityProducts
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean containsCommunity(CommunityResult r, String c) {
|
||||||
|
if (Optional.ofNullable(r.getContext()).isPresent()) {
|
||||||
|
return r
|
||||||
|
.getContext()
|
||||||
|
.stream()
|
||||||
|
.map(Context::getCode)
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.contains(c);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||||
|
|
||||||
|
public class ResultProject implements Serializable {
|
||||||
|
private String resultId;
|
||||||
|
private List<Project> projectsList;
|
||||||
|
|
||||||
|
public String getResultId() {
|
||||||
|
return resultId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResultId(String resultId) {
|
||||||
|
this.resultId = resultId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Project> getProjectsList() {
|
||||||
|
return projectsList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProjectsList(List<Project> projectsList) {
|
||||||
|
this.projectsList = projectsList;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,67 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spark action to trigger the dump of results associated to research community - reseach initiative/infrasctructure The
|
||||||
|
* actual dump if performed via the class DumpProducts that is used also for the entire graph dump
|
||||||
|
*/
|
||||||
|
public class SparkDumpCommunityProducts implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpCommunityProducts.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkDumpCommunityProducts.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String resultClassName = parser.get("resultTableName");
|
||||||
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
|
String communityMapPath = parser.get("communityMapPath");
|
||||||
|
|
||||||
|
final String dumpType = Optional
|
||||||
|
.ofNullable(parser.get("dumpType"))
|
||||||
|
.map(String::valueOf)
|
||||||
|
.orElse("community");
|
||||||
|
|
||||||
|
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, CommunityResult.class,
|
||||||
|
dumpType);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,193 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Funder;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Validated;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Preparation of the Project information to be added to the dumped results. For each result associated to at least one
|
||||||
|
* Project, a serialization of an instance af ResultProject class is done. ResultProject contains the resultId, and the
|
||||||
|
* list of Projects (as in eu.dnetlib.dhp.schema.dump.oaf.community.Project) it is associated to
|
||||||
|
*/
|
||||||
|
public class SparkPrepareResultProject implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkPrepareResultProject.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkPrepareResultProject.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
prepareResultProjectList(spark, inputPath, outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath) {
|
||||||
|
Dataset<Relation> relation = Utils
|
||||||
|
.readPath(spark, inputPath + "/relation", Relation.class)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||||
|
r.getRelClass().equalsIgnoreCase(ModelConstants.IS_PRODUCED_BY));
|
||||||
|
|
||||||
|
Dataset<eu.dnetlib.dhp.schema.oaf.Project> projects = Utils
|
||||||
|
.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class);
|
||||||
|
|
||||||
|
projects
|
||||||
|
.joinWith(relation, projects.col("id").equalTo(relation.col("target")), "inner")
|
||||||
|
.groupByKey(
|
||||||
|
(MapFunction<Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation>, String>) value -> value
|
||||||
|
._2()
|
||||||
|
.getSource(),
|
||||||
|
Encoders.STRING())
|
||||||
|
.mapGroups(
|
||||||
|
(MapGroupsFunction<String, Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation>, ResultProject>) (s,
|
||||||
|
it) -> {
|
||||||
|
Set<String> projectSet = new HashSet<>();
|
||||||
|
Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation> first = it.next();
|
||||||
|
ResultProject rp = new ResultProject();
|
||||||
|
rp.setResultId(s);
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Project p = first._1();
|
||||||
|
projectSet.add(p.getId());
|
||||||
|
Project ps = getProject(p, first._2);
|
||||||
|
|
||||||
|
List<Project> projList = new ArrayList<>();
|
||||||
|
projList.add(ps);
|
||||||
|
rp.setProjectsList(projList);
|
||||||
|
it.forEachRemaining(c -> {
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Project op = c._1();
|
||||||
|
if (!projectSet.contains(op.getId())) {
|
||||||
|
projList
|
||||||
|
.add(getProject(op, c._2));
|
||||||
|
|
||||||
|
projectSet.add(op.getId());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
return rp;
|
||||||
|
}, Encoders.bean(ResultProject.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Project getProject(eu.dnetlib.dhp.schema.oaf.Project op, Relation relation) {
|
||||||
|
Project p = Project
|
||||||
|
.newInstance(
|
||||||
|
op.getId(),
|
||||||
|
op.getCode().getValue(),
|
||||||
|
Optional
|
||||||
|
.ofNullable(op.getAcronym())
|
||||||
|
.map(Field::getValue)
|
||||||
|
.orElse(null),
|
||||||
|
Optional
|
||||||
|
.ofNullable(op.getTitle())
|
||||||
|
.map(Field::getValue)
|
||||||
|
.orElse(null),
|
||||||
|
Optional
|
||||||
|
.ofNullable(op.getFundingtree())
|
||||||
|
.map(value -> {
|
||||||
|
List<Funder> tmp = value
|
||||||
|
.stream()
|
||||||
|
.map(ft -> getFunder(ft.getValue()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if (!tmp.isEmpty()) {
|
||||||
|
return tmp.get(0);
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.orElse(null));
|
||||||
|
|
||||||
|
Optional<DataInfo> di = Optional.ofNullable(op.getDataInfo());
|
||||||
|
Provenance provenance = new Provenance();
|
||||||
|
if (di.isPresent()) {
|
||||||
|
provenance.setProvenance(di.get().getProvenanceaction().getClassname());
|
||||||
|
provenance.setTrust(di.get().getTrust());
|
||||||
|
p.setProvenance(provenance);
|
||||||
|
}
|
||||||
|
if (Boolean.TRUE.equals(relation.getValidated())) {
|
||||||
|
p.setValidated(Validated.newInstance(relation.getValidated(), relation.getValidationDate()));
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Funder getFunder(String fundingtree) {
|
||||||
|
final Funder f = new Funder();
|
||||||
|
final Document doc;
|
||||||
|
try {
|
||||||
|
final SAXReader reader = new SAXReader();
|
||||||
|
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||||
|
doc = reader.read(new StringReader(fundingtree));
|
||||||
|
f.setShortName(((Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
||||||
|
f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText());
|
||||||
|
f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||||
|
for (Object o : doc.selectNodes("//funding_level_0")) {
|
||||||
|
List<Node> node = ((Node) o).selectNodes("./name");
|
||||||
|
f.setFundingStream((node.get(0)).getText());
|
||||||
|
}
|
||||||
|
|
||||||
|
return f;
|
||||||
|
} catch (DocumentException | SAXException e) {
|
||||||
|
throw new IllegalArgumentException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,50 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spark job to trigger the split of results associated to research community - reseach initiative/infrasctructure. The
|
||||||
|
* actual split is performed by the class CommunitySplit
|
||||||
|
*/
|
||||||
|
public class SparkSplitForCommunity implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkSplitForCommunity.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkSplitForCommunity.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String communityMapPath = parser.get("communityMapPath");
|
||||||
|
|
||||||
|
CommunitySplit split = new CommunitySplit();
|
||||||
|
split.run(isSparkSessionManaged, inputPath, outputPath, communityMapPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,90 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.community;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
public class SparkUpdateProjectInfo implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkUpdateProjectInfo.class);
|
||||||
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkUpdateProjectInfo.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String preparedInfoPath = parser.get("preparedInfoPath");
|
||||||
|
log.info("preparedInfoPath: {}", preparedInfoPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
extend(spark, inputPath, outputPath, preparedInfoPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void extend(
|
||||||
|
SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
String preparedInfoPath) {
|
||||||
|
Dataset<CommunityResult> result = Utils.readPath(spark, inputPath, CommunityResult.class);
|
||||||
|
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
|
||||||
|
result
|
||||||
|
.joinWith(
|
||||||
|
resultProject, result.col("id").equalTo(resultProject.col("resultId")),
|
||||||
|
"left")
|
||||||
|
.map((MapFunction<Tuple2<CommunityResult, ResultProject>, CommunityResult>) value -> {
|
||||||
|
CommunityResult r = value._1();
|
||||||
|
Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList()));
|
||||||
|
return r;
|
||||||
|
}, Encoders.bean(CommunityResult.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,26 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Constants implements Serializable {
|
||||||
|
|
||||||
|
public static final String IS_HOSTED_BY = "isHostedBy";
|
||||||
|
public static final String HOSTS = "hosts";
|
||||||
|
|
||||||
|
public static final String IS_FUNDED_BY = "isFundedBy";
|
||||||
|
public static final String FUNDS = "funds";
|
||||||
|
|
||||||
|
public static final String FUNDINGS = "fundings";
|
||||||
|
|
||||||
|
public static final String RESULT_ENTITY = "result";
|
||||||
|
public static final String DATASOURCE_ENTITY = "datasource";
|
||||||
|
public static final String CONTEXT_ENTITY = "context";
|
||||||
|
public static final String ORGANIZATION_ENTITY = "organization";
|
||||||
|
public static final String PROJECT_ENTITY = "project";
|
||||||
|
|
||||||
|
public static final String CONTEXT_ID = "00";
|
||||||
|
public static final String CONTEXT_NS_PREFIX = "context_____";
|
||||||
|
public static final String UNKNOWN = "UNKNOWN";
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,84 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deserialization of the information in the context needed to create Context Entities, and relations between context
|
||||||
|
* entities and datasources and projects
|
||||||
|
*/
|
||||||
|
public class ContextInfo implements Serializable {
|
||||||
|
private String id;
|
||||||
|
private String description;
|
||||||
|
private String type;
|
||||||
|
private String zenodocommunity;
|
||||||
|
private String name;
|
||||||
|
private List<String> projectList;
|
||||||
|
private List<String> datasourceList;
|
||||||
|
private List<String> subject;
|
||||||
|
|
||||||
|
public List<String> getSubject() {
|
||||||
|
return subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubject(List<String> subject) {
|
||||||
|
this.subject = subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getZenodocommunity() {
|
||||||
|
return zenodocommunity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setZenodocommunity(String zenodocommunity) {
|
||||||
|
this.zenodocommunity = zenodocommunity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getProjectList() {
|
||||||
|
return projectList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProjectList(List<String> projectList) {
|
||||||
|
this.projectList = projectList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getDatasourceList() {
|
||||||
|
return datasourceList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDatasourceList(List<String> datasourceList) {
|
||||||
|
this.datasourceList = datasourceList;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,110 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||||
|
import org.apache.hadoop.io.compress.CompressionCodecFactory;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and
|
||||||
|
* collects the general information for contexes of type community or ri. The general information is the id of the
|
||||||
|
* context, its label, the subjects associated to the context, its zenodo community, description and type. This
|
||||||
|
* information is used to create a new Context Entity
|
||||||
|
*/
|
||||||
|
public class CreateContextEntities implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class);
|
||||||
|
private final transient Configuration conf;
|
||||||
|
private final transient BufferedWriter writer;
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
CreateContextEntities.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
|
log.info("hdfsPath: {}", hdfsPath);
|
||||||
|
|
||||||
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
|
log.info("nameNode: {}", hdfsNameNode);
|
||||||
|
|
||||||
|
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||||
|
log.info("isLookUpUrl: {}", isLookUpUrl);
|
||||||
|
|
||||||
|
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
|
||||||
|
|
||||||
|
log.info("Processing contexts...");
|
||||||
|
cce.execute(Process::getEntity, isLookUpUrl);
|
||||||
|
|
||||||
|
cce.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void close() throws IOException {
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public CreateContextEntities(String hdfsPath, String hdfsNameNode) throws IOException {
|
||||||
|
this.conf = new Configuration();
|
||||||
|
this.conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
FileSystem fileSystem = FileSystem.get(this.conf);
|
||||||
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
|
FSDataOutputStream fsDataOutputStream = null;
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fsDataOutputStream = fileSystem.append(hdfsWritePath);
|
||||||
|
} else {
|
||||||
|
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||||
|
}
|
||||||
|
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
|
||||||
|
CompressionCodec codec = factory.getCodecByClassName("org.apache.hadoop.io.compress.GzipCodec");
|
||||||
|
|
||||||
|
this.writer = new BufferedWriter(new OutputStreamWriter(codec.createOutputStream(fsDataOutputStream),
|
||||||
|
StandardCharsets.UTF_8));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public <R extends ResearchInitiative> void execute(final Function<ContextInfo, R> producer, String isLookUpUrl)
|
||||||
|
throws ISLookUpException {
|
||||||
|
|
||||||
|
QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
|
||||||
|
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
||||||
|
|
||||||
|
final Consumer<ContextInfo> consumer = ci -> writeEntity(producer.apply(ci));
|
||||||
|
|
||||||
|
queryInformationSystem.getContextInformation(consumer);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected <R extends ResearchInitiative> void writeEntity(final R r) {
|
||||||
|
try {
|
||||||
|
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
|
||||||
|
writer.newLine();
|
||||||
|
} catch (final IOException e) {
|
||||||
|
throw new IllegalArgumentException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,128 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes the set of new Relation between the context and datasources. At the moment the relation between the context
|
||||||
|
* and the project is not created because of a low coverage in the profiles of openaire ids related to projects
|
||||||
|
*/
|
||||||
|
public class CreateContextRelation implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class);
|
||||||
|
private final transient Configuration conf;
|
||||||
|
private final transient BufferedWriter writer;
|
||||||
|
private final transient QueryInformationSystem queryInformationSystem;
|
||||||
|
|
||||||
|
private static final String CONTEX_RELATION_DATASOURCE = "contentproviders";
|
||||||
|
private static final String CONTEX_RELATION_PROJECT = "projects";
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
CreateContextRelation.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json")));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
|
log.info("hdfsPath: {}", hdfsPath);
|
||||||
|
|
||||||
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
|
log.info("nameNode: {}", hdfsNameNode);
|
||||||
|
|
||||||
|
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||||
|
log.info("isLookUpUrl: {}", isLookUpUrl);
|
||||||
|
|
||||||
|
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl);
|
||||||
|
|
||||||
|
log.info("Creating relation for datasource...");
|
||||||
|
cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class));
|
||||||
|
|
||||||
|
log.info("Creating relations for projects... ");
|
||||||
|
cce
|
||||||
|
.execute(
|
||||||
|
Process::getRelation, CONTEX_RELATION_PROJECT,
|
||||||
|
ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));
|
||||||
|
|
||||||
|
cce.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void close() throws IOException {
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl)
|
||||||
|
throws IOException, ISLookUpException {
|
||||||
|
this.conf = new Configuration();
|
||||||
|
this.conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
|
queryInformationSystem = new QueryInformationSystem();
|
||||||
|
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
||||||
|
queryInformationSystem.execContextRelationQuery();
|
||||||
|
|
||||||
|
FileSystem fileSystem = FileSystem.get(this.conf);
|
||||||
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
|
FSDataOutputStream fsDataOutputStream = null;
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fsDataOutputStream = fileSystem.append(hdfsWritePath);
|
||||||
|
} else {
|
||||||
|
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void execute(final Function<ContextInfo, List<Relation>> producer, String category, String prefix) {
|
||||||
|
|
||||||
|
final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(this::writeEntity);
|
||||||
|
|
||||||
|
queryInformationSystem.getContextRelation(consumer, category, prefix);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void writeEntity(final Relation r) {
|
||||||
|
try {
|
||||||
|
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
|
||||||
|
writer.newLine();
|
||||||
|
} catch (final Exception e) {
|
||||||
|
throw new MyRuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,518 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Funder;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Journal;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dumps of entities in the model defined in eu.dnetlib.dhp.schema.dump.oaf.graph. Results are dumped using the same
|
||||||
|
* Mapper as for eu.dnetlib.dhp.schema.dump.oaf.community, while for the other entities the mapping is defined below
|
||||||
|
*/
|
||||||
|
public class DumpGraphEntities implements Serializable {
|
||||||
|
|
||||||
|
public void run(Boolean isSparkSessionManaged,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
Class<? extends OafEntity> inputClazz,
|
||||||
|
String communityMapPath) {
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
switch (ModelSupport.idPrefixMap.get(inputClazz)) {
|
||||||
|
case "50":
|
||||||
|
DumpProducts d = new DumpProducts();
|
||||||
|
d
|
||||||
|
.run(
|
||||||
|
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, GraphResult.class,
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.DUMPTYPE.COMPLETE.getType());
|
||||||
|
break;
|
||||||
|
case "40":
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
projectMap(spark, inputPath, outputPath, inputClazz);
|
||||||
|
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
case "20":
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
organizationMap(spark, inputPath, outputPath, inputClazz);
|
||||||
|
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
case "10":
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
datasourceMap(spark, inputPath, outputPath, inputClazz);
|
||||||
|
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <E extends OafEntity> void datasourceMap(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
Class<E> inputClazz) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(
|
||||||
|
(MapFunction<E, Datasource>) d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d),
|
||||||
|
Encoders.bean(Datasource.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <E extends OafEntity> void projectMap(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
Class<E> inputClazz) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(
|
||||||
|
(MapFunction<E, Project>) p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p),
|
||||||
|
Encoders.bean(Project.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
|
||||||
|
Datasource datasource = new Datasource();
|
||||||
|
|
||||||
|
datasource.setId(d.getId());
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOriginalId())
|
||||||
|
.ifPresent(
|
||||||
|
oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getPid())
|
||||||
|
.ifPresent(
|
||||||
|
pids -> pids
|
||||||
|
.stream()
|
||||||
|
.map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatasourcetype())
|
||||||
|
.ifPresent(
|
||||||
|
dsType -> datasource
|
||||||
|
.setDatasourcetype(DatasourceSchemeValue.newInstance(dsType.getClassid(), dsType.getClassname())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOpenairecompatibility())
|
||||||
|
.ifPresent(v -> datasource.setOpenairecompatibility(v.getClassname()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOfficialname())
|
||||||
|
.ifPresent(oname -> datasource.setOfficialname(oname.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getEnglishname())
|
||||||
|
.ifPresent(ename -> datasource.setEnglishname(ename.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getWebsiteurl())
|
||||||
|
.ifPresent(wsite -> datasource.setWebsiteurl(wsite.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getLogourl())
|
||||||
|
.ifPresent(lurl -> datasource.setLogourl(lurl.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDateofvalidation())
|
||||||
|
.ifPresent(dval -> datasource.setDateofvalidation(dval.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDescription())
|
||||||
|
.ifPresent(dex -> datasource.setDescription(dex.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getSubjects())
|
||||||
|
.ifPresent(
|
||||||
|
sbjs -> datasource.setSubjects(sbjs.stream().map(sbj -> sbj.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOdpolicies())
|
||||||
|
.ifPresent(odp -> datasource.setPolicies(Arrays.asList(odp.getValue())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOdlanguages())
|
||||||
|
.ifPresent(
|
||||||
|
langs -> datasource
|
||||||
|
.setLanguages(langs.stream().map(lang -> lang.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getOdcontenttypes())
|
||||||
|
.ifPresent(
|
||||||
|
ctypes -> datasource
|
||||||
|
.setContenttypes(ctypes.stream().map(ctype -> ctype.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getReleasestartdate())
|
||||||
|
.ifPresent(rd -> datasource.setReleasestartdate(rd.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getReleaseenddate())
|
||||||
|
.ifPresent(ed -> datasource.setReleaseenddate(ed.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getMissionstatementurl())
|
||||||
|
.ifPresent(ms -> datasource.setMissionstatementurl(ms.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatabaseaccesstype())
|
||||||
|
.ifPresent(ar -> datasource.setAccessrights(ar.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatauploadtype())
|
||||||
|
.ifPresent(dut -> datasource.setUploadrights(dut.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatabaseaccessrestriction())
|
||||||
|
.ifPresent(dar -> datasource.setDatabaseaccessrestriction(dar.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatauploadrestriction())
|
||||||
|
.ifPresent(dur -> datasource.setDatauploadrestriction(dur.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getVersioning())
|
||||||
|
.ifPresent(v -> datasource.setVersioning(v.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getCitationguidelineurl())
|
||||||
|
.ifPresent(cu -> datasource.setCitationguidelineurl(cu.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getPidsystems())
|
||||||
|
.ifPresent(ps -> datasource.setPidsystems(ps.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getCertificates())
|
||||||
|
.ifPresent(c -> datasource.setCertificates(c.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getPolicies())
|
||||||
|
.ifPresent(ps -> datasource.setPolicies(ps.stream().map(p -> p.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getJournal())
|
||||||
|
.ifPresent(j -> datasource.setJournal(getContainer(j)));
|
||||||
|
|
||||||
|
return datasource;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Container getContainer(Journal j) {
|
||||||
|
Container c = new Container();
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getName())
|
||||||
|
.ifPresent(n -> c.setName(n));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getIssnPrinted())
|
||||||
|
.ifPresent(issnp -> c.setIssnPrinted(issnp));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getIssnOnline())
|
||||||
|
.ifPresent(issno -> c.setIssnOnline(issno));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getIssnLinking())
|
||||||
|
.ifPresent(isnl -> c.setIssnLinking(isnl));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getEp())
|
||||||
|
.ifPresent(ep -> c.setEp(ep));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getIss())
|
||||||
|
.ifPresent(iss -> c.setIss(iss));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getSp())
|
||||||
|
.ifPresent(sp -> c.setSp(sp));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getVol())
|
||||||
|
.ifPresent(vol -> c.setVol(vol));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getEdition())
|
||||||
|
.ifPresent(edition -> c.setEdition(edition));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getConferencedate())
|
||||||
|
.ifPresent(cdate -> c.setConferencedate(cdate));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(j.getConferenceplace())
|
||||||
|
.ifPresent(cplace -> c.setConferenceplace(cplace));
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException {
|
||||||
|
Project project = new Project();
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getId())
|
||||||
|
.ifPresent(id -> project.setId(id));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getWebsiteurl())
|
||||||
|
.ifPresent(w -> project.setWebsiteurl(w.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getCode())
|
||||||
|
.ifPresent(code -> project.setCode(code.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getAcronym())
|
||||||
|
.ifPresent(acronynim -> project.setAcronym(acronynim.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getTitle())
|
||||||
|
.ifPresent(title -> project.setTitle(title.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getStartdate())
|
||||||
|
.ifPresent(sdate -> project.setStartdate(sdate.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getEnddate())
|
||||||
|
.ifPresent(edate -> project.setEnddate(edate.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getCallidentifier())
|
||||||
|
.ifPresent(cide -> project.setCallidentifier(cide.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getKeywords())
|
||||||
|
.ifPresent(key -> project.setKeywords(key.getValue()));
|
||||||
|
|
||||||
|
Optional<Field<String>> omandate = Optional.ofNullable(p.getOamandatepublications());
|
||||||
|
Optional<Field<String>> oecsc39 = Optional.ofNullable(p.getEcsc39());
|
||||||
|
boolean mandate = false;
|
||||||
|
if (omandate.isPresent()) {
|
||||||
|
if (omandate.get().getValue().equals("true")) {
|
||||||
|
mandate = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (oecsc39.isPresent()) {
|
||||||
|
if (oecsc39.get().getValue().equals("true")) {
|
||||||
|
mandate = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
project.setOpenaccessmandateforpublications(mandate);
|
||||||
|
project.setOpenaccessmandatefordataset(false);
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getEcarticle29_3())
|
||||||
|
.ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true")));
|
||||||
|
|
||||||
|
project
|
||||||
|
.setSubject(
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getSubjects())
|
||||||
|
.map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getSummary())
|
||||||
|
.ifPresent(summary -> project.setSummary(summary.getValue()));
|
||||||
|
|
||||||
|
Optional<Float> ofundedamount = Optional.ofNullable(p.getFundedamount());
|
||||||
|
Optional<Field<String>> ocurrency = Optional.ofNullable(p.getCurrency());
|
||||||
|
Optional<Float> ototalcost = Optional.ofNullable(p.getTotalcost());
|
||||||
|
|
||||||
|
if (ocurrency.isPresent()) {
|
||||||
|
if (ofundedamount.isPresent()) {
|
||||||
|
if (ototalcost.isPresent()) {
|
||||||
|
project
|
||||||
|
.setGranted(
|
||||||
|
Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get()));
|
||||||
|
} else {
|
||||||
|
project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
project
|
||||||
|
.setH2020programme(
|
||||||
|
Optional
|
||||||
|
.ofNullable(p.getH2020classification())
|
||||||
|
.map(
|
||||||
|
classification -> classification
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
c -> Programme
|
||||||
|
.newInstance(
|
||||||
|
c.getH2020Programme().getCode(), c.getH2020Programme().getDescription()))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
|
||||||
|
Optional<List<Field<String>>> ofundTree = Optional
|
||||||
|
.ofNullable(p.getFundingtree());
|
||||||
|
List<Funder> funList = new ArrayList<>();
|
||||||
|
if (ofundTree.isPresent()) {
|
||||||
|
for (Field<String> fundingtree : ofundTree.get()) {
|
||||||
|
funList.add(getFunder(fundingtree.getValue()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
project.setFunding(funList);
|
||||||
|
|
||||||
|
return project;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Funder getFunder(String fundingtree) throws DocumentException {
|
||||||
|
Funder f = new Funder();
|
||||||
|
final Document doc;
|
||||||
|
|
||||||
|
doc = new SAXReader().read(new StringReader(fundingtree));
|
||||||
|
f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
||||||
|
f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText());
|
||||||
|
f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||||
|
// f.setId(((org.dom4j.Node) (doc.selectNodes("//funder/id").get(0))).getText());
|
||||||
|
|
||||||
|
String id = "";
|
||||||
|
String description = "";
|
||||||
|
// List<Levels> fundings = new ArrayList<>();
|
||||||
|
int level = 0;
|
||||||
|
List<org.dom4j.Node> nodes = doc.selectNodes("//funding_level_" + level);
|
||||||
|
while (nodes.size() > 0) {
|
||||||
|
for (org.dom4j.Node n : nodes) {
|
||||||
|
|
||||||
|
List node = n.selectNodes("./id");
|
||||||
|
id = ((org.dom4j.Node) node.get(0)).getText();
|
||||||
|
id = id.substring(id.indexOf("::") + 2);
|
||||||
|
|
||||||
|
node = n.selectNodes("./description");
|
||||||
|
description += ((Node) node.get(0)).getText() + " - ";
|
||||||
|
|
||||||
|
}
|
||||||
|
level += 1;
|
||||||
|
nodes = doc.selectNodes("//funding_level_" + level);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!id.equals("")) {
|
||||||
|
Fundings fundings = new Fundings();
|
||||||
|
fundings.setId(id);
|
||||||
|
fundings.setDescription(description.substring(0, description.length() - 3).trim());
|
||||||
|
f.setFunding_stream(fundings);
|
||||||
|
}
|
||||||
|
|
||||||
|
return f;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <E extends OafEntity> void organizationMap(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
Class<E> inputClazz) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(
|
||||||
|
(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
|
||||||
|
Encoders.bean(Organization.class))
|
||||||
|
.filter((FilterFunction<Organization>) o -> o != null)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static eu.dnetlib.dhp.schema.dump.oaf.graph.Organization mapOrganization(
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Organization org) {
|
||||||
|
if (org.getDataInfo().getDeletedbyinference())
|
||||||
|
return null;
|
||||||
|
Organization organization = new Organization();
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getLegalshortname())
|
||||||
|
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getLegalname())
|
||||||
|
.ifPresent(value -> organization.setLegalname(value.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getWebsiteurl())
|
||||||
|
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getAlternativeNames())
|
||||||
|
.ifPresent(
|
||||||
|
value -> organization
|
||||||
|
.setAlternativenames(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(v -> v.getValue())
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getCountry())
|
||||||
|
.ifPresent(
|
||||||
|
value -> {
|
||||||
|
if (!value.getClassid().equals(Constants.UNKNOWN)) {
|
||||||
|
organization.setCountry(Country.newInstance(value.getClassid(), value.getClassname()));
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getId())
|
||||||
|
.ifPresent(value -> organization.setId(value));
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(org.getPid())
|
||||||
|
.ifPresent(
|
||||||
|
value -> organization
|
||||||
|
.setPid(
|
||||||
|
value
|
||||||
|
.stream()
|
||||||
|
.map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
|
return organization;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,201 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates new Relations (as in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation) from the information in the Entity. The
|
||||||
|
* new Relations are created for the datasource in the collectedfrom and hostedby elements and for the context related
|
||||||
|
* to communities and research initiative/infrastructures. For collectedfrom elements it creates: datasource -> provides
|
||||||
|
* -> result and result -> isProvidedBy -> datasource For hostedby elements it creates: datasource -> hosts -> result
|
||||||
|
* and result -> isHostedBy -> datasource For context elements it creates: context <-> isRelatedTo <-> result. Note for
|
||||||
|
* context: it gets the first provenance in the dataInfo. If more than one is present the others are not dumped
|
||||||
|
*/
|
||||||
|
public class Extractor implements Serializable {
|
||||||
|
|
||||||
|
public void run(Boolean isSparkSessionManaged,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
Class<? extends Result> inputClazz,
|
||||||
|
String communityMapPath) {
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
extractRelationResult(
|
||||||
|
spark, inputPath, outputPath, inputClazz, Utils.getCommunityMap(spark, communityMapPath));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private <R extends Result> void extractRelationResult(SparkSession spark,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
Class<R> inputClazz,
|
||||||
|
CommunityMap communityMap) {
|
||||||
|
|
||||||
|
Set<Integer> hashCodes = new HashSet<>();
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.flatMap((FlatMapFunction<R, Relation>) value -> {
|
||||||
|
List<Relation> relationList = new ArrayList<>();
|
||||||
|
extractRelationsFromInstance(hashCodes, value, relationList);
|
||||||
|
Set<String> communities = communityMap.keySet();
|
||||||
|
Optional
|
||||||
|
.ofNullable(value.getContext())
|
||||||
|
.ifPresent(contexts -> contexts.forEach(context -> {
|
||||||
|
String id = context.getId();
|
||||||
|
if (id.contains(":")) {
|
||||||
|
id = id.substring(0, id.indexOf(":"));
|
||||||
|
}
|
||||||
|
if (communities.contains(id)) {
|
||||||
|
String contextId = Utils.getContextId(id);
|
||||||
|
Provenance provenance = Optional
|
||||||
|
.ofNullable(context.getDataInfo())
|
||||||
|
.map(
|
||||||
|
dinfo -> Optional
|
||||||
|
.ofNullable(dinfo.get(0).getProvenanceaction())
|
||||||
|
.map(
|
||||||
|
paction -> Provenance
|
||||||
|
.newInstance(
|
||||||
|
paction.getClassid(),
|
||||||
|
dinfo.get(0).getTrust()))
|
||||||
|
.orElse(null))
|
||||||
|
.orElse(null);
|
||||||
|
Relation r = getRelation(
|
||||||
|
value.getId(), contextId,
|
||||||
|
Constants.RESULT_ENTITY,
|
||||||
|
Constants.CONTEXT_ENTITY,
|
||||||
|
ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP, provenance);
|
||||||
|
if (!hashCodes.contains(r.hashCode())) {
|
||||||
|
relationList
|
||||||
|
.add(r);
|
||||||
|
hashCodes.add(r.hashCode());
|
||||||
|
}
|
||||||
|
r = getRelation(
|
||||||
|
contextId, value.getId(),
|
||||||
|
Constants.CONTEXT_ENTITY,
|
||||||
|
Constants.RESULT_ENTITY,
|
||||||
|
ModelConstants.IS_RELATED_TO,
|
||||||
|
ModelConstants.RELATIONSHIP, provenance);
|
||||||
|
if (!hashCodes.contains(r.hashCode())) {
|
||||||
|
relationList
|
||||||
|
.add(
|
||||||
|
r);
|
||||||
|
hashCodes.add(r.hashCode());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}));
|
||||||
|
|
||||||
|
return relationList.iterator();
|
||||||
|
}, Encoders.bean(Relation.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private <R extends Result> void extractRelationsFromInstance(Set<Integer> hashCodes, R value,
|
||||||
|
List<Relation> relationList) {
|
||||||
|
Optional
|
||||||
|
.ofNullable(value.getInstance())
|
||||||
|
.ifPresent(inst -> inst.forEach(instance -> {
|
||||||
|
Optional
|
||||||
|
.ofNullable(instance.getCollectedfrom())
|
||||||
|
.ifPresent(
|
||||||
|
cf -> getRelatioPair(
|
||||||
|
value, relationList, cf,
|
||||||
|
ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
|
||||||
|
Optional
|
||||||
|
.ofNullable(instance.getHostedby())
|
||||||
|
.ifPresent(
|
||||||
|
hb -> getRelatioPair(
|
||||||
|
value, relationList, hb,
|
||||||
|
Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> void getRelatioPair(R value, List<Relation> relationList, KeyValue cf,
|
||||||
|
String resultDatasource, String datasourceResult,
|
||||||
|
Set<Integer> hashCodes) {
|
||||||
|
Provenance provenance = Optional
|
||||||
|
.ofNullable(cf.getDataInfo())
|
||||||
|
.map(
|
||||||
|
dinfo -> Optional
|
||||||
|
.ofNullable(dinfo.getProvenanceaction())
|
||||||
|
.map(
|
||||||
|
paction -> Provenance
|
||||||
|
.newInstance(
|
||||||
|
paction.getClassname(),
|
||||||
|
dinfo.getTrust()))
|
||||||
|
.orElse(
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)))
|
||||||
|
.orElse(
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST));
|
||||||
|
Relation r = getRelation(
|
||||||
|
value.getId(),
|
||||||
|
cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
|
||||||
|
resultDatasource, ModelConstants.PROVISION,
|
||||||
|
provenance);
|
||||||
|
if (!hashCodes.contains(r.hashCode())) {
|
||||||
|
relationList
|
||||||
|
.add(r);
|
||||||
|
hashCodes.add(r.hashCode());
|
||||||
|
}
|
||||||
|
|
||||||
|
r = getRelation(
|
||||||
|
cf.getKey(), value.getId(),
|
||||||
|
Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
|
||||||
|
datasourceResult, ModelConstants.PROVISION,
|
||||||
|
provenance);
|
||||||
|
|
||||||
|
if (!hashCodes.contains(r.hashCode())) {
|
||||||
|
relationList
|
||||||
|
.add(r);
|
||||||
|
hashCodes.add(r.hashCode());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Relation getRelation(String source, String target, String sourceType, String targetType,
|
||||||
|
String relName, String relType, Provenance provenance) {
|
||||||
|
Relation r = new Relation();
|
||||||
|
r.setSource(Node.newInstance(source, sourceType));
|
||||||
|
r.setTarget(Node.newInstance(target, targetType));
|
||||||
|
r.setReltype(RelType.newInstance(relName, relType));
|
||||||
|
r.setProvenance(provenance);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class MergedRels implements Serializable {
|
||||||
|
private String organizationId;
|
||||||
|
private String representativeId;
|
||||||
|
|
||||||
|
public String getOrganizationId() {
|
||||||
|
return organizationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOrganizationId(String organizationId) {
|
||||||
|
this.organizationId = organizationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRepresentativeId() {
|
||||||
|
return representativeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRepresentativeId(String representativeId) {
|
||||||
|
this.representativeId = representativeId;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class OrganizationMap extends HashMap<String, List<String>> {
|
||||||
|
|
||||||
|
public OrganizationMap() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> get(String key) {
|
||||||
|
|
||||||
|
if (super.get(key) == null) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
return super.get(key);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,99 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* It process the ContextInfo information to produce a new Context Entity or a set of Relations between the generic
|
||||||
|
* context entity and datasource/projects related to the context.
|
||||||
|
*/
|
||||||
|
public class Process implements Serializable {
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public static <R extends ResearchInitiative> R getEntity(ContextInfo ci) {
|
||||||
|
try {
|
||||||
|
ResearchInitiative ri;
|
||||||
|
if (ci.getType().equals("community")) {
|
||||||
|
ri = new ResearchCommunity();
|
||||||
|
((ResearchCommunity) ri).setSubject(ci.getSubject());
|
||||||
|
ri.setType(Constants.RESEARCH_COMMUNITY);
|
||||||
|
} else {
|
||||||
|
ri = new ResearchInitiative();
|
||||||
|
ri.setType(Constants.RESEARCH_INFRASTRUCTURE);
|
||||||
|
}
|
||||||
|
ri.setId(Utils.getContextId(ci.getId()));
|
||||||
|
ri.setAcronym(ci.getId());
|
||||||
|
|
||||||
|
ri.setDescription(ci.getDescription());
|
||||||
|
ri.setName(ci.getName());
|
||||||
|
if (StringUtils.isNotEmpty(ci.getZenodocommunity())) {
|
||||||
|
ri.setZenodo_community(Constants.ZENODO_COMMUNITY_PREFIX + ci.getZenodocommunity());
|
||||||
|
}
|
||||||
|
return (R) ri;
|
||||||
|
|
||||||
|
} catch (final Exception e) {
|
||||||
|
throw new MyRuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<Relation> getRelation(ContextInfo ci) {
|
||||||
|
try {
|
||||||
|
|
||||||
|
List<Relation> relationList = new ArrayList<>();
|
||||||
|
ci
|
||||||
|
.getDatasourceList()
|
||||||
|
.forEach(ds -> {
|
||||||
|
|
||||||
|
String nodeType = ModelSupport.idPrefixEntity.get(ds.substring(0, 2));
|
||||||
|
|
||||||
|
String contextId = Utils.getContextId(ci.getId());
|
||||||
|
relationList
|
||||||
|
.add(
|
||||||
|
Relation
|
||||||
|
.newInstance(
|
||||||
|
Node
|
||||||
|
.newInstance(
|
||||||
|
contextId, eu.dnetlib.dhp.schema.dump.oaf.graph.Constants.CONTEXT_ENTITY),
|
||||||
|
Node.newInstance(ds, nodeType),
|
||||||
|
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
Constants.USER_CLAIM,
|
||||||
|
Constants.DEFAULT_TRUST)));
|
||||||
|
|
||||||
|
relationList
|
||||||
|
.add(
|
||||||
|
Relation
|
||||||
|
.newInstance(
|
||||||
|
Node.newInstance(ds, nodeType),
|
||||||
|
Node
|
||||||
|
.newInstance(
|
||||||
|
contextId, eu.dnetlib.dhp.schema.dump.oaf.graph.Constants.CONTEXT_ENTITY),
|
||||||
|
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
Constants.USER_CLAIM,
|
||||||
|
Constants.DEFAULT_TRUST)));
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
return relationList;
|
||||||
|
|
||||||
|
} catch (final Exception e) {
|
||||||
|
throw new MyRuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,198 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Element;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
public class QueryInformationSystem {
|
||||||
|
|
||||||
|
private ISLookUpService isLookUp;
|
||||||
|
private List<String> contextRelationResult;
|
||||||
|
|
||||||
|
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||||
|
" and $x//context/param[./@name = 'status']/text() = 'all' " +
|
||||||
|
" return " +
|
||||||
|
"$x//context";
|
||||||
|
|
||||||
|
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
|
||||||
|
+
|
||||||
|
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
|
||||||
|
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
|
||||||
|
+
|
||||||
|
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
|
||||||
|
|
||||||
|
public void getContextInformation(final Consumer<ContextInfo> consumer) throws ISLookUpException {
|
||||||
|
|
||||||
|
isLookUp
|
||||||
|
.quickSearchProfile(XQUERY_ENTITY)
|
||||||
|
.forEach(c -> {
|
||||||
|
ContextInfo cinfo = new ContextInfo();
|
||||||
|
String[] cSplit = c.split("@@");
|
||||||
|
cinfo.setId(cSplit[0]);
|
||||||
|
cinfo.setName(cSplit[1]);
|
||||||
|
cinfo.setDescription(cSplit[2]);
|
||||||
|
if (!cSplit[3].trim().equals("")) {
|
||||||
|
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
|
||||||
|
}
|
||||||
|
cinfo.setZenodocommunity(cSplit[4]);
|
||||||
|
cinfo.setType(cSplit[5]);
|
||||||
|
consumer.accept(cinfo);
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getContextRelationResult() {
|
||||||
|
return contextRelationResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContextRelationResult(List<String> contextRelationResult) {
|
||||||
|
this.contextRelationResult = contextRelationResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ISLookUpService getIsLookUp() {
|
||||||
|
return isLookUp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIsLookUp(ISLookUpService isLookUpService) {
|
||||||
|
this.isLookUp = isLookUpService;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void execContextRelationQuery() throws ISLookUpException {
|
||||||
|
contextRelationResult = isLookUp.quickSearchProfile(XQUERY);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getContextRelation(final Consumer<ContextInfo> consumer, String category, String prefix) {
|
||||||
|
|
||||||
|
contextRelationResult.forEach(xml -> {
|
||||||
|
ContextInfo cinfo = new ContextInfo();
|
||||||
|
final Document doc;
|
||||||
|
|
||||||
|
try {
|
||||||
|
final SAXReader reader = new SAXReader();
|
||||||
|
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||||
|
doc = reader.read(new StringReader(xml));
|
||||||
|
Element root = doc.getRootElement();
|
||||||
|
cinfo.setId(root.attributeValue("id"));
|
||||||
|
|
||||||
|
Iterator<Element> it = root.elementIterator();
|
||||||
|
while (it.hasNext()) {
|
||||||
|
Element el = it.next();
|
||||||
|
if (el.getName().equals("category")) {
|
||||||
|
String categoryId = el.attributeValue("id");
|
||||||
|
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
|
||||||
|
if (categoryId.equals(category)) {
|
||||||
|
cinfo.setDatasourceList(getCategoryList(el, prefix));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
consumer.accept(cinfo);
|
||||||
|
} catch (DocumentException | SAXException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
private List<String> getCategoryList(Element el, String prefix) {
|
||||||
|
List<String> datasourceList = new ArrayList<>();
|
||||||
|
for (Object node : el.selectNodes(".//concept")) {
|
||||||
|
String oid = getOpenaireId((Node) node, prefix);
|
||||||
|
if (oid != null)
|
||||||
|
datasourceList.add(oid);
|
||||||
|
}
|
||||||
|
|
||||||
|
return datasourceList;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getOpenaireId(Node el, String prefix) {
|
||||||
|
for (Object node : el.selectNodes(".//param")) {
|
||||||
|
Node n = (Node) node;
|
||||||
|
if (n.valueOf("./@name").equals("openaireId")) {
|
||||||
|
return prefix + "|" + n.getText();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return makeOpenaireId(el, prefix);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private String makeOpenaireId(Node el, String prefix) {
|
||||||
|
if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
String funder = "";
|
||||||
|
String grantId = null;
|
||||||
|
String funding = null;
|
||||||
|
for (Object node : el.selectNodes(".//param")) {
|
||||||
|
Node n = (Node) node;
|
||||||
|
switch (n.valueOf("./@name")) {
|
||||||
|
case "funding":
|
||||||
|
funding = n.getText();
|
||||||
|
break;
|
||||||
|
case "funder":
|
||||||
|
funder = n.getText();
|
||||||
|
break;
|
||||||
|
case "CD_PROJECT_NUMBER":
|
||||||
|
grantId = n.getText();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String nsp = null;
|
||||||
|
|
||||||
|
switch (funder.toLowerCase()) {
|
||||||
|
case "ec":
|
||||||
|
if (funding == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (funding.toLowerCase().contains("h2020")) {
|
||||||
|
nsp = "corda__h2020::";
|
||||||
|
} else {
|
||||||
|
nsp = "corda_______::";
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "tubitak":
|
||||||
|
nsp = "tubitakf____::";
|
||||||
|
break;
|
||||||
|
case "dfg":
|
||||||
|
nsp = "dfgf________::";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
StringBuilder bld = new StringBuilder();
|
||||||
|
bld.append(funder.toLowerCase());
|
||||||
|
for (int i = funder.length(); i < 12; i++)
|
||||||
|
bld.append("_");
|
||||||
|
bld.append("::");
|
||||||
|
nsp = bld.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
return prefix + "|" + nsp + DHPUtils.md5(grantId);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,122 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads all the entities of the same type (Relation / Results) and saves them in the same folder
|
||||||
|
*/
|
||||||
|
public class SparkCollectAndSave implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkCollectAndSave.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkCollectAndSave.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final Boolean aggregateResult = Optional
|
||||||
|
.ofNullable(parser.get("resultAggregation"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath + "/result");
|
||||||
|
run(spark, inputPath, outputPath, aggregateResult);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void run(SparkSession spark, String inputPath, String outputPath, boolean aggregate) {
|
||||||
|
if (aggregate) {
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/result/publication", GraphResult.class)
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/result/dataset", GraphResult.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/result/software", GraphResult.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath + "/result");
|
||||||
|
} else {
|
||||||
|
write(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/result/publication", GraphResult.class),
|
||||||
|
outputPath + "/publication");
|
||||||
|
write(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/result/dataset", GraphResult.class),
|
||||||
|
outputPath + "/dataset");
|
||||||
|
write(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class),
|
||||||
|
outputPath + "/otheresearchproduct");
|
||||||
|
write(
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/result/software", GraphResult.class),
|
||||||
|
outputPath + "/software");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + "/relation/publication", Relation.class)
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/relation/dataset", Relation.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/relation/orp", Relation.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/relation/software", Relation.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/relation/contextOrg", Relation.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/relation/context", Relation.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/relation");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void write(Dataset<GraphResult> dataSet, String outputPath) {
|
||||||
|
dataSet
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spark Job that fires the dump for the entites
|
||||||
|
*/
|
||||||
|
public class SparkDumpEntitiesJob implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpEntitiesJob.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkDumpEntitiesJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String resultClassName = parser.get("resultTableName");
|
||||||
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
|
final String communityMapPath = parser.get("communityMapPath");
|
||||||
|
|
||||||
|
Class<? extends OafEntity> inputClazz = (Class<? extends OafEntity>) Class.forName(resultClassName);
|
||||||
|
|
||||||
|
DumpGraphEntities dg = new DumpGraphEntities();
|
||||||
|
dg.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,135 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dumps eu.dnetlib.dhp.schema.oaf.Relation in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
|
||||||
|
*/
|
||||||
|
public class SparkDumpRelationJob implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpRelationJob.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkDumpRelationJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
Optional<String> rs = Optional.ofNullable(parser.get("removeSet"));
|
||||||
|
final Set<String> removeSet = new HashSet<>();
|
||||||
|
if (rs.isPresent()) {
|
||||||
|
Collections.addAll(removeSet, rs.get().split(";"));
|
||||||
|
}
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
dumpRelation(spark, inputPath, outputPath, removeSet);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void dumpRelation(SparkSession spark, String inputPath, String outputPath, Set<String> removeSet) {
|
||||||
|
Dataset<Relation> relations = Utils.readPath(spark, inputPath, Relation.class);
|
||||||
|
relations
|
||||||
|
.filter((FilterFunction<Relation>) r -> !removeSet.contains(r.getRelClass()))
|
||||||
|
.map((MapFunction<Relation, eu.dnetlib.dhp.schema.dump.oaf.graph.Relation>) relation -> {
|
||||||
|
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation relNew = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
|
||||||
|
relNew
|
||||||
|
.setSource(
|
||||||
|
Node
|
||||||
|
.newInstance(
|
||||||
|
relation.getSource(),
|
||||||
|
ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))));
|
||||||
|
|
||||||
|
relNew
|
||||||
|
.setTarget(
|
||||||
|
Node
|
||||||
|
.newInstance(
|
||||||
|
relation.getTarget(),
|
||||||
|
ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))));
|
||||||
|
|
||||||
|
relNew
|
||||||
|
.setReltype(
|
||||||
|
RelType
|
||||||
|
.newInstance(
|
||||||
|
relation.getRelClass(),
|
||||||
|
relation.getSubRelType()));
|
||||||
|
|
||||||
|
Optional<DataInfo> odInfo = Optional.ofNullable(relation.getDataInfo());
|
||||||
|
if (odInfo.isPresent()) {
|
||||||
|
DataInfo dInfo = odInfo.get();
|
||||||
|
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() &&
|
||||||
|
Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
|
||||||
|
relNew
|
||||||
|
.setProvenance(
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
dInfo.getProvenanceaction().getClassname(),
|
||||||
|
dInfo.getTrust()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Boolean.TRUE.equals(relation.getValidated())) {
|
||||||
|
relNew.setValidated(relation.getValidated());
|
||||||
|
relNew.setValidationDate(relation.getValidationDate());
|
||||||
|
}
|
||||||
|
|
||||||
|
return relNew;
|
||||||
|
|
||||||
|
}, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spark job that fires the extraction of relations from entities
|
||||||
|
*/
|
||||||
|
public class SparkExtractRelationFromEntities implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkExtractRelationFromEntities.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkExtractRelationFromEntities.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String resultClassName = parser.get("resultTableName");
|
||||||
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
|
final String communityMapPath = parser.get("communityMapPath");
|
||||||
|
|
||||||
|
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||||
|
|
||||||
|
Extractor extractor = new Extractor();
|
||||||
|
extractor.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,179 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.*;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create new Relations between Context Entities and Organizations whose products are associated to the context. It
|
||||||
|
* produces relation such as: organization <-> isRelatedTo <-> context
|
||||||
|
*/
|
||||||
|
public class SparkOrganizationRelation implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkOrganizationRelation.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkOrganizationRelation.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final OrganizationMap organizationMap = new Gson()
|
||||||
|
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
|
||||||
|
final String serializedOrganizationMap = new Gson().toJson(organizationMap);
|
||||||
|
log.info("organization map : {}", serializedOrganizationMap);
|
||||||
|
|
||||||
|
final String communityMapPath = parser.get("communityMapPath");
|
||||||
|
log.info("communityMapPath: {}", communityMapPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
extractRelation(spark, inputPath, organizationMap, outputPath, communityMapPath);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap,
|
||||||
|
String outputPath, String communityMapPath) {
|
||||||
|
|
||||||
|
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
||||||
|
|
||||||
|
Dataset<Relation> relationDataset = Utils.readPath(spark, inputPath, Relation.class);
|
||||||
|
|
||||||
|
relationDataset.createOrReplaceTempView("relation");
|
||||||
|
|
||||||
|
List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList = new ArrayList<>();
|
||||||
|
|
||||||
|
Dataset<MergedRels> mergedRelsDataset = spark
|
||||||
|
.sql(
|
||||||
|
"SELECT target organizationId, source representativeId " +
|
||||||
|
"FROM relation " +
|
||||||
|
"WHERE datainfo.deletedbyinference = false " +
|
||||||
|
"AND relclass = 'merges' " +
|
||||||
|
"AND substr(source, 1, 2) = '20'")
|
||||||
|
.as(Encoders.bean(MergedRels.class));
|
||||||
|
|
||||||
|
mergedRelsDataset.map((MapFunction<MergedRels, MergedRels>) mergedRels -> {
|
||||||
|
if (organizationMap.containsKey(mergedRels.getOrganizationId())) {
|
||||||
|
return mergedRels;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}, Encoders.bean(MergedRels.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collectAsList()
|
||||||
|
.forEach(getMergedRelsConsumer(organizationMap, relList, communityMap));
|
||||||
|
|
||||||
|
organizationMap
|
||||||
|
.keySet()
|
||||||
|
.forEach(
|
||||||
|
oId -> organizationMap
|
||||||
|
.get(oId)
|
||||||
|
.forEach(community -> {
|
||||||
|
if (communityMap.containsKey(community)) {
|
||||||
|
addRelations(relList, community, oId);
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
|
||||||
|
spark
|
||||||
|
.createDataset(relList, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
private static Consumer<MergedRels> getMergedRelsConsumer(OrganizationMap organizationMap,
|
||||||
|
List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList, CommunityMap communityMap) {
|
||||||
|
return mergedRels -> {
|
||||||
|
String oId = mergedRels.getOrganizationId();
|
||||||
|
organizationMap
|
||||||
|
.get(oId)
|
||||||
|
.forEach(community -> {
|
||||||
|
if (communityMap.containsKey(community)) {
|
||||||
|
addRelations(relList, community, mergedRels.getRepresentativeId());
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
organizationMap.remove(oId);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void addRelations(List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList, String community,
|
||||||
|
String organization) {
|
||||||
|
|
||||||
|
String id = Utils.getContextId(community);
|
||||||
|
log.info("create relation for organization: {}", organization);
|
||||||
|
relList
|
||||||
|
.add(
|
||||||
|
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
|
||||||
|
.newInstance(
|
||||||
|
Node.newInstance(id, Constants.CONTEXT_ENTITY),
|
||||||
|
Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))),
|
||||||
|
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.USER_CLAIM,
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)));
|
||||||
|
|
||||||
|
relList
|
||||||
|
.add(
|
||||||
|
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
|
||||||
|
.newInstance(
|
||||||
|
Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))),
|
||||||
|
Node.newInstance(id, Constants.CONTEXT_ENTITY),
|
||||||
|
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
|
||||||
|
Provenance
|
||||||
|
.newInstance(
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.USER_CLAIM,
|
||||||
|
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,136 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* It selects the valid relations among those present in the graph. One relation is valid if it is not deletedbyinference
|
||||||
|
* and if both the source and the target node are present in the graph and are not deleted by inference nor invisible.
|
||||||
|
* To check this I made a view of the ids of all the entities in the graph, and select the relations for which a join exists
|
||||||
|
* with this view for both the source and the target
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class SparkSelectValidRelationsJob implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelationsJob.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkSelectValidRelationsJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
selectValidRelation(spark, inputPath, outputPath);
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void selectValidRelation(SparkSession spark, String inputPath, String outputPath) {
|
||||||
|
Dataset<Relation> relation = Utils.readPath(spark, inputPath + "/relation", Relation.class);
|
||||||
|
Dataset<Publication> publication = Utils.readPath(spark, inputPath + "/publication", Publication.class);
|
||||||
|
Dataset<eu.dnetlib.dhp.schema.oaf.Dataset> dataset = Utils
|
||||||
|
.readPath(spark, inputPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class);
|
||||||
|
Dataset<Software> software = Utils.readPath(spark, inputPath + "/software", Software.class);
|
||||||
|
Dataset<OtherResearchProduct> other = Utils
|
||||||
|
.readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class);
|
||||||
|
Dataset<Organization> organization = Utils.readPath(spark, inputPath + "/organization", Organization.class);
|
||||||
|
Dataset<Project> project = Utils.readPath(spark, inputPath + "/project", Project.class);
|
||||||
|
Dataset<Datasource> datasource = Utils.readPath(spark, inputPath + "/datasource", Datasource.class);
|
||||||
|
|
||||||
|
relation.createOrReplaceTempView("relation");
|
||||||
|
publication.createOrReplaceTempView("publication");
|
||||||
|
dataset.createOrReplaceTempView("dataset");
|
||||||
|
other.createOrReplaceTempView("other");
|
||||||
|
software.createOrReplaceTempView("software");
|
||||||
|
organization.createOrReplaceTempView("organization");
|
||||||
|
project.createOrReplaceTempView("project");
|
||||||
|
datasource.createOrReplaceTempView("datasource");
|
||||||
|
|
||||||
|
spark
|
||||||
|
.sql(
|
||||||
|
"SELECT id " +
|
||||||
|
"FROM publication " +
|
||||||
|
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
|
||||||
|
"UNION ALL " +
|
||||||
|
"SELECT id " +
|
||||||
|
"FROM dataset " +
|
||||||
|
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
|
||||||
|
"UNION ALL " +
|
||||||
|
"SELECT id " +
|
||||||
|
"FROM other " +
|
||||||
|
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
|
||||||
|
"UNION ALL " +
|
||||||
|
"SELECT id " +
|
||||||
|
"FROM software " +
|
||||||
|
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
|
||||||
|
"UNION ALL " +
|
||||||
|
"SELECT id " +
|
||||||
|
"FROM organization " +
|
||||||
|
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
|
||||||
|
"UNION ALL " +
|
||||||
|
"SELECT id " +
|
||||||
|
"FROM project " +
|
||||||
|
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
|
||||||
|
"UNION ALL " +
|
||||||
|
"SELECT id " +
|
||||||
|
"FROM datasource " +
|
||||||
|
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false ")
|
||||||
|
.createOrReplaceTempView("identifiers");
|
||||||
|
|
||||||
|
spark
|
||||||
|
.sql(
|
||||||
|
"SELECT relation.* " +
|
||||||
|
"FROM relation " +
|
||||||
|
"JOIN identifiers i1 " +
|
||||||
|
"ON source = i1.id " +
|
||||||
|
"JOIN identifiers i2 " +
|
||||||
|
"ON target = i2.id " +
|
||||||
|
"WHERE datainfo.deletedbyinference = false")
|
||||||
|
.as(Encoders.bean(Relation.class))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
|
||||||
|
|
||||||
|
public class MyRuntimeException extends RuntimeException {
|
||||||
|
|
||||||
|
public MyRuntimeException() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public MyRuntimeException(
|
||||||
|
final String message,
|
||||||
|
final Throwable cause,
|
||||||
|
final boolean enableSuppression,
|
||||||
|
final boolean writableStackTrace) {
|
||||||
|
super(message, cause, enableSuppression, writableStackTrace);
|
||||||
|
}
|
||||||
|
|
||||||
|
public MyRuntimeException(final String message, final Throwable cause) {
|
||||||
|
super(message, cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
public MyRuntimeException(final String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public MyRuntimeException(final Throwable cause) {
|
||||||
|
super(cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
|
||||||
|
|
||||||
|
public class NoAvailableEntityTypeException extends Exception {
|
||||||
|
public NoAvailableEntityTypeException() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public NoAvailableEntityTypeException(
|
||||||
|
final String message,
|
||||||
|
final Throwable cause,
|
||||||
|
final boolean enableSuppression,
|
||||||
|
final boolean writableStackTrace) {
|
||||||
|
super(message, cause, enableSuppression, writableStackTrace);
|
||||||
|
}
|
||||||
|
|
||||||
|
public NoAvailableEntityTypeException(final String message, final Throwable cause) {
|
||||||
|
super(message, cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
public NoAvailableEntityTypeException(final String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public NoAvailableEntityTypeException(final Throwable cause) {
|
||||||
|
super(cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,128 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.funderresults;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.*;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Funder;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC
|
||||||
|
* for the EC it specifies also the fundingStream (FP7 or H2020)
|
||||||
|
*/
|
||||||
|
public class SparkDumpFunderResults implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpFunderResults.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkDumpFunderResults.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json"));
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
writeResultProjectList(spark, inputPath, outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath) {
|
||||||
|
Dataset<CommunityResult> result = Utils
|
||||||
|
.readPath(spark, inputPath + "/publication", CommunityResult.class)
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/otherresearchproduct", CommunityResult.class))
|
||||||
|
.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
|
||||||
|
log.info("Number of result {}", result.count());
|
||||||
|
Dataset<String> tmp = result
|
||||||
|
.flatMap((FlatMapFunction<CommunityResult, String>) cr -> cr.getProjects().stream().map(p -> {
|
||||||
|
return getFunderName(p);
|
||||||
|
}).collect(Collectors.toList()).iterator(), Encoders.STRING())
|
||||||
|
.distinct();
|
||||||
|
List<String> funderList = tmp.collectAsList();
|
||||||
|
funderList.forEach(funder -> {
|
||||||
|
dumpResults(funder, result, outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
private static String getFunderName(Project p) {
|
||||||
|
Optional<Funder> ofunder = Optional.ofNullable(p.getFunder());
|
||||||
|
if (ofunder.isPresent()) {
|
||||||
|
String fName = ofunder.get().getShortName();
|
||||||
|
if (fName.equalsIgnoreCase("ec")) {
|
||||||
|
fName += "_" + ofunder.get().getFundingStream();
|
||||||
|
}
|
||||||
|
return fName;
|
||||||
|
} else {
|
||||||
|
String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase();
|
||||||
|
if (fName.equalsIgnoreCase("ec")) {
|
||||||
|
if (p.getId().contains("h2020")) {
|
||||||
|
fName += "_H2020";
|
||||||
|
} else {
|
||||||
|
fName += "_FP7";
|
||||||
|
}
|
||||||
|
} else if (fName.equalsIgnoreCase("conicytf")) {
|
||||||
|
fName = "CONICYT";
|
||||||
|
} else if (fName.equalsIgnoreCase("dfgf")) {
|
||||||
|
fName = "DFG";
|
||||||
|
} else if (fName.equalsIgnoreCase("tubitakf")) {
|
||||||
|
fName = "TUBITAK";
|
||||||
|
} else if (fName.equalsIgnoreCase("euenvagency")) {
|
||||||
|
fName = "EEA";
|
||||||
|
}
|
||||||
|
return fName;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void dumpResults(String funder, Dataset<CommunityResult> results, String outputPath) {
|
||||||
|
results.map((MapFunction<CommunityResult, CommunityResult>) r -> {
|
||||||
|
if (!Optional.ofNullable(r.getProjects()).isPresent()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
for (Project p : r.getProjects()) {
|
||||||
|
String fName = getFunderName(p);
|
||||||
|
if (fName.equalsIgnoreCase(funder)) {
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}, Encoders.bean(CommunityResult.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/" + funder);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,111 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.funderresults;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.ResultProject;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Selects the results linked to projects. Only for these results the dump will be performed.
|
||||||
|
* The code to perform the dump and to expend the dumped results with the information related to projects
|
||||||
|
* is the one used for the dump of the community products
|
||||||
|
*/
|
||||||
|
public class SparkResultLinkedToProject implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkResultLinkedToProject.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkResultLinkedToProject.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String resultClassName = parser.get("resultTableName");
|
||||||
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
|
final String resultProjectsPath = parser.get("graphPath");
|
||||||
|
log.info("graphPath: {}", resultProjectsPath);
|
||||||
|
|
||||||
|
String communityMapPath = parser.get("communityMapPath");
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
writeResultsLinkedToProjects(
|
||||||
|
communityMapPath, spark, inputClazz, inputPath, outputPath, resultProjectsPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> void writeResultsLinkedToProjects(String communityMapPath, SparkSession spark,
|
||||||
|
Class<R> inputClazz,
|
||||||
|
String inputPath, String outputPath, String resultProjectsPath) {
|
||||||
|
|
||||||
|
Dataset<R> results = Utils
|
||||||
|
.readPath(spark, inputPath, inputClazz)
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||||
|
!r.getDataInfo().getInvisible());
|
||||||
|
Dataset<ResultProject> resultProjectDataset = Utils
|
||||||
|
.readPath(spark, resultProjectsPath, ResultProject.class);
|
||||||
|
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
||||||
|
results
|
||||||
|
.joinWith(resultProjectDataset, results.col("id").equalTo(resultProjectDataset.col("resultId")))
|
||||||
|
.map((MapFunction<Tuple2<R, ResultProject>, CommunityResult>) t2 -> {
|
||||||
|
CommunityResult cr = (CommunityResult) ResultMapper
|
||||||
|
.map(
|
||||||
|
t2._1(),
|
||||||
|
communityMap, Constants.DUMPTYPE.FUNDER.getType());
|
||||||
|
cr.setProjects(t2._2().getProjectsList());
|
||||||
|
return cr;
|
||||||
|
}, Encoders.bean(CommunityResult.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,82 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.projectssubset;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
public class ProjectsSubsetSparkJob implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(ProjectsSubsetSparkJob.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
ProjectsSubsetSparkJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json"));
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
final String projectListPath = parser.get("projectListPath");
|
||||||
|
log.info("projectListPath: {}", projectListPath);
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
getNewProjectList(spark, inputPath, outputPath, projectListPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void getNewProjectList(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
String projectListPath) {
|
||||||
|
Dataset<String> projectList = spark.read().textFile(projectListPath);
|
||||||
|
Dataset<Project> projects;
|
||||||
|
projects = Utils.readPath(spark, inputPath, Project.class);
|
||||||
|
projects
|
||||||
|
.joinWith(projectList, projects.col("id").equalTo(projectList.col("value")), "left")
|
||||||
|
.map((MapFunction<Tuple2<Project, String>, Project>) t2 -> {
|
||||||
|
if (Optional.ofNullable(t2._2()).isPresent())
|
||||||
|
return null;
|
||||||
|
return t2._1();
|
||||||
|
}, Encoders.bean(Project.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
Utils
|
||||||
|
.readPath(spark, outputPath, Project.class)
|
||||||
|
.map((MapFunction<Project, String>) p -> p.getId(), Encoders.STRING())
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.text(projectListPath);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,26 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "gp",
|
||||||
|
"paramLongName": "graphPath",
|
||||||
|
"paramDescription": "the relationPath",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,37 @@
|
||||||
|
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"is",
|
||||||
|
"paramLongName":"isLookUpUrl",
|
||||||
|
"paramDescription": "URL of the isLookUp Service",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"nn",
|
||||||
|
"paramLongName":"nameNode",
|
||||||
|
"paramDescription": "the name node",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "sd",
|
||||||
|
"paramLongName": "singleDeposition",
|
||||||
|
"paramDescription": "true if the dump should be created for a single community",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ci",
|
||||||
|
"paramLongName": "communityId",
|
||||||
|
"paramDescription": "the id of the community for which to create the dump",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ra",
|
||||||
|
"paramLongName": "resultAggregation",
|
||||||
|
"paramDescription": "true if all the result type should be saved under the generic result name. false to get a different dump for each result type",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName":"cmp",
|
||||||
|
"paramLongName":"communityMapPath",
|
||||||
|
"paramDescription": "the path to the serialization of the community map",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"tn",
|
||||||
|
"paramLongName":"resultTableName",
|
||||||
|
"paramDescription": "the name of the result table we are currently working on",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"is",
|
||||||
|
"paramLongName":"isLookUpUrl",
|
||||||
|
"paramDescription": "URL of the isLookUp Service",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "hdfs",
|
||||||
|
"paramLongName": "hdfsPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "nn",
|
||||||
|
"paramLongName": "nameNode",
|
||||||
|
"paramDescription": "the name node",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "hdp",
|
||||||
|
"paramLongName": "hdfsPath",
|
||||||
|
"paramDescription": "the path used to store the output archive",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"nn",
|
||||||
|
"paramLongName":"nameNode",
|
||||||
|
"paramDescription": "the name node",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"ss",
|
||||||
|
"paramLongName":"splitSize",
|
||||||
|
"paramDescription": "the maximum size of the archive",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"ocm",
|
||||||
|
"paramLongName":"organizationCommunityMap",
|
||||||
|
"paramDescription": "the organization community map association",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"cmp",
|
||||||
|
"paramLongName":"communityMapPath",
|
||||||
|
"paramDescription": "the path to the serialization of the community map",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,47 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"cmp",
|
||||||
|
"paramLongName":"communityMapPath",
|
||||||
|
"paramDescription": "the path to the serialization of the community map",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"tn",
|
||||||
|
"paramLongName":"resultTableName",
|
||||||
|
"paramDescription": "the name of the result table we are currently working on",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"dt",
|
||||||
|
"paramLongName":"dumpType",
|
||||||
|
"paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project",
|
||||||
|
"paramRequired": false
|
||||||
|
}, {
|
||||||
|
"paramName":"cid",
|
||||||
|
"paramLongName":"communityId",
|
||||||
|
"paramDescription": "the id of the community to be dumped",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"tn",
|
||||||
|
"paramLongName":"resultTableName",
|
||||||
|
"paramDescription": "the name of the result table we are currently working on",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"gp",
|
||||||
|
"paramLongName":"graphPath",
|
||||||
|
"paramDescription": "the path to the relations",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"cmp",
|
||||||
|
"paramLongName":"communityMapPath",
|
||||||
|
"paramDescription": "the path to the relations",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
|
||||||
|
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "rs",
|
||||||
|
"paramLongName": "removeSet",
|
||||||
|
"paramDescription": "the list of classname relations, split by ';', not to be dumped",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "pip",
|
||||||
|
"paramLongName": "preparedInfoPath",
|
||||||
|
"paramDescription": "the path of the association result projectlist",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,27 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "pl",
|
||||||
|
"paramLongName": "projectListPath",
|
||||||
|
"paramDescription": "the path of the association result projectlist",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<value>openaire</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,171 @@
|
||||||
|
<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>sourcePath</name>
|
||||||
|
<description>the source path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>projectListPath</name>
|
||||||
|
<description>the path to the project list</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>outputPath</name>
|
||||||
|
<description>the output path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>accessToken</name>
|
||||||
|
<description>the access token used for the deposition in Zenodo</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>connectionUrl</name>
|
||||||
|
<description>the connection url for Zenodo</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>metadata</name>
|
||||||
|
<description> the metadata associated to the deposition</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>depositionType</name>
|
||||||
|
<description>the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>conceptRecordId</name>
|
||||||
|
<description>for new version, the id of the record for the old deposition</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>depositionId</name>
|
||||||
|
<description>the depositionId of a deposition open that has to be added content</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorCores</name>
|
||||||
|
<description>number of cores used by single executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozieActionShareLibForSpark2</name>
|
||||||
|
<description>oozie action sharelib for spark 2.*</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2ExtraListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||||
|
<description>spark 2.* extra listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||||
|
<description>spark 2.* sql query execution listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2YarnHistoryServerAddress</name>
|
||||||
|
<description>spark 2.* yarn history server address</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2EventLogDir</name>
|
||||||
|
<description>spark 2.* event log dir location</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.job.queuename</name>
|
||||||
|
<value>${queueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||||
|
<value>${oozieLauncherQueueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
<start to="dump_project"/>
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
<action name="dump_project">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table project </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/project</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>noneed</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="get_new_projects"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="get_new_projects">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table project </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectsSubsetSparkJob</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/project</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/tar/project</arg>
|
||||||
|
<arg>--projectListPath</arg><arg>${projectListPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="make_archive"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="make_archive">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
|
||||||
|
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/tar</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="send_zenodo"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="send_zenodo">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
|
||||||
|
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||||
|
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
||||||
|
<arg>--metadata</arg><arg>${metadata}</arg>
|
||||||
|
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
||||||
|
<arg>--depositionType</arg><arg>${depositionType}</arg>
|
||||||
|
<arg>--depositionId</arg><arg>${depositionId}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<end name="End"/>
|
||||||
|
</workflow-app>
|
|
@ -0,0 +1,37 @@
|
||||||
|
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"cmp",
|
||||||
|
"paramLongName":"communityMapPath",
|
||||||
|
"paramDescription": "the path to the serialization of the community map",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
}, {
|
||||||
|
"paramName":"cid",
|
||||||
|
"paramLongName":"communityId",
|
||||||
|
"paramDescription": "the id of the community to be dumped",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName":"dt",
|
||||||
|
"paramLongName":"depositionType",
|
||||||
|
"paramDescription": "the type of the deposition (new, version, update)",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"cri",
|
||||||
|
"paramLongName":"conceptRecordId",
|
||||||
|
"paramDescription": "The id of the concept record for a new version",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"di",
|
||||||
|
"paramLongName":"depositionId",
|
||||||
|
"paramDescription": "the id of an open deposition which has not been published",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"hdfsp",
|
||||||
|
"paramLongName":"hdfsPath",
|
||||||
|
"paramDescription": "the path of the folder tofind files to send to Zenodo",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "nn",
|
||||||
|
"paramLongName": "nameNode",
|
||||||
|
"paramDescription": "the name node",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "at",
|
||||||
|
"paramLongName": "accessToken",
|
||||||
|
"paramDescription": "the access token for the deposition",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"cu",
|
||||||
|
"paramLongName":"connectionUrl",
|
||||||
|
"paramDescription": "the url to connect to deposit",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"m",
|
||||||
|
"paramLongName":"metadata",
|
||||||
|
"paramDescription": "metadata associated to the deposition",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"p",
|
||||||
|
"paramLongName":"publish",
|
||||||
|
"paramDescription": "if to publish the upload",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,30 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<value>openaire</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,4 @@
|
||||||
|
## This is a classpath-based import file (this header is required)
|
||||||
|
dump_complete classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app
|
||||||
|
dump_funder classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app
|
||||||
|
dump_community classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app
|
|
@ -0,0 +1,306 @@
|
||||||
|
<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>singleDeposition</name>
|
||||||
|
<description>Indicates if it is a single community deposition</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>communityId</name>
|
||||||
|
<description>the id of the community to be dumped if a dump for a single community should be done</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>dumpType</name>
|
||||||
|
<description>the type of the dump one of {complete, community, funder}</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>onlyUpload</name>
|
||||||
|
<description>true if the dump is already done and should only be upload in zenodo</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>upload</name>
|
||||||
|
<description>true if the dump should be upload in zenodo</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sourcePath</name>
|
||||||
|
<description>the source path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>isLookUpUrl</name>
|
||||||
|
<description>the isLookup service endpoint</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>outputPath</name>
|
||||||
|
<description>the output path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>resultAggregation</name>
|
||||||
|
<description>true if all the result type have to be dumped under result. false otherwise</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>accessToken</name>
|
||||||
|
<description>the access token used for the deposition in Zenodo</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>connectionUrl</name>
|
||||||
|
<description>the connection url for Zenodo</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>metadata</name>
|
||||||
|
<description> the metadata associated to the deposition</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>depositionType</name>
|
||||||
|
<description>the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>conceptRecordId</name>
|
||||||
|
<description>for new version, the id of the record for the old deposition</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>depositionId</name>
|
||||||
|
<description>the depositionId of a deposition open that has to be added content</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>organizationCommunityMap</name>
|
||||||
|
<description>the organization community map</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<description>the target hive database name</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<description>hive server jdbc url</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<description>hive server metastore URIs</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorCores</name>
|
||||||
|
<description>number of cores used by single executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozieActionShareLibForSpark2</name>
|
||||||
|
<description>oozie action sharelib for spark 2.*</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2ExtraListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||||
|
<description>spark 2.* extra listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||||
|
<description>spark 2.* sql query execution listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2YarnHistoryServerAddress</name>
|
||||||
|
<description>spark 2.* yarn history server address</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2EventLogDir</name>
|
||||||
|
<description>spark 2.* event log dir location</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.job.queuename</name>
|
||||||
|
<value>${queueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||||
|
<value>${oozieLauncherQueueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
|
<start to="only_upload"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<decision name="only_upload">
|
||||||
|
<switch>
|
||||||
|
<case to="send_zenodo">${wf:conf('onlyUpload') eq true}</case>
|
||||||
|
<default to="reset_outputpath"/>
|
||||||
|
</switch>
|
||||||
|
</decision>
|
||||||
|
|
||||||
|
<action name="reset_outputpath">
|
||||||
|
<fs>
|
||||||
|
<delete path="${outputPath}"/>
|
||||||
|
<mkdir path="${outputPath}"/>
|
||||||
|
</fs>
|
||||||
|
<ok to="save_community_map"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="save_community_map">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||||
|
<arg>--singleDeposition</arg><arg>${singleDeposition}</arg>
|
||||||
|
<arg>--communityId</arg><arg>${communityId}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="choose_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<decision name="choose_dump">
|
||||||
|
<switch>
|
||||||
|
<case to="dump_funder">${wf:conf('dumpType') eq "funder"}</case>
|
||||||
|
<case to="dump_community">${wf:conf('dumpType') eq "community"}</case>
|
||||||
|
<default to="dump_complete"/>
|
||||||
|
</switch>
|
||||||
|
</decision>
|
||||||
|
|
||||||
|
<!-- Sub-workflow which runs the dump for the complete graph -->
|
||||||
|
<action name="dump_complete">
|
||||||
|
<sub-workflow>
|
||||||
|
<app-path>${wf:appPath()}/dump_complete
|
||||||
|
</app-path>
|
||||||
|
<propagate-configuration/>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>communityMapPath</name>
|
||||||
|
<value>${workingDir}/communityMap</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>outputPath</name>
|
||||||
|
<value>${workingDir}/tar</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sourcePath</name>
|
||||||
|
<value>${sourcePath}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>organizationCommunityMap</name>
|
||||||
|
<value>${organizationCommunityMap}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>isLookUpUrl</name>
|
||||||
|
<value>${isLookUpUrl}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>resultAggregation</name>
|
||||||
|
<value>${resultAggregation}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</sub-workflow>
|
||||||
|
<ok to="make_archive" />
|
||||||
|
<error to="Kill" />
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<!-- Sub-workflow which runs the dump for the complete graph -->
|
||||||
|
<action name="dump_community">
|
||||||
|
<sub-workflow>
|
||||||
|
<app-path>${wf:appPath()}/dump_community
|
||||||
|
</app-path>
|
||||||
|
<propagate-configuration/>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>sourcePath</name>
|
||||||
|
<value>${sourcePath}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>communityMapPath</name>
|
||||||
|
<value>${workingDir}/communityMap</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>outputPath</name>
|
||||||
|
<value>${workingDir}/tar</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</sub-workflow>
|
||||||
|
<ok to="make_archive" />
|
||||||
|
<error to="Kill" />
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_funder">
|
||||||
|
<sub-workflow>
|
||||||
|
<app-path>${wf:appPath()}/dump_funder
|
||||||
|
</app-path>
|
||||||
|
<propagate-configuration/>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>communityMapPath</name>
|
||||||
|
<value>${workingDir}/communityMap</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>outputPath</name>
|
||||||
|
<value>${workingDir}/tar</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sourcePath</name>
|
||||||
|
<value>${sourcePath}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>dumpType</name>
|
||||||
|
<value>${dumpType}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</sub-workflow>
|
||||||
|
<ok to="make_archive" />
|
||||||
|
<error to="Kill" />
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="make_archive">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
|
||||||
|
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/tar</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="should_upload"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<decision name="should_upload">
|
||||||
|
<switch>
|
||||||
|
<case to="send_zenodo">${wf:conf('upload') eq true}</case>
|
||||||
|
<default to="End"/>
|
||||||
|
</switch>
|
||||||
|
</decision>
|
||||||
|
|
||||||
|
<action name="send_zenodo">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
|
||||||
|
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||||
|
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
||||||
|
<arg>--metadata</arg><arg>${metadata}</arg>
|
||||||
|
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
|
||||||
|
<arg>--depositionType</arg><arg>${depositionType}</arg>
|
||||||
|
<arg>--depositionId</arg><arg>${depositionId}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
|
||||||
|
</workflow-app>
|
|
@ -0,0 +1,30 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<value>openaire</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,362 @@
|
||||||
|
<workflow-app name="sub_dump_community_products" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>sourcePath</name>
|
||||||
|
<description>the source path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>outputPath</name>
|
||||||
|
<description>the output path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<description>the target hive database name</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<description>hive server jdbc url</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<description>hive server metastore URIs</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorCores</name>
|
||||||
|
<description>number of cores used by single executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozieActionShareLibForSpark2</name>
|
||||||
|
<description>oozie action sharelib for spark 2.*</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2ExtraListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||||
|
<description>spark 2.* extra listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||||
|
<description>spark 2.* sql query execution listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2YarnHistoryServerAddress</name>
|
||||||
|
<description>spark 2.* yarn history server address</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2EventLogDir</name>
|
||||||
|
<description>spark 2.* event log dir location</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.job.queuename</name>
|
||||||
|
<value>${queueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||||
|
<value>${oozieLauncherQueueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
|
<start to="fork_dump"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<fork name="fork_dump">
|
||||||
|
<path start="dump_publication"/>
|
||||||
|
<path start="dump_dataset"/>
|
||||||
|
<path start="dump_orp"/>
|
||||||
|
<path start="dump_software"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
|
<action name="dump_publication">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table publication for community/funder related products</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/dump/publication</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
<arg>--dumpType</arg><arg>${dumpType}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_dataset">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table dataset for community/funder related products</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/dump/dataset</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_orp">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table ORP for community related products</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_software">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table software for community related products</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/dump/software</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<join name="join_dump" to="prepareResultProject"/>
|
||||||
|
|
||||||
|
<action name="prepareResultProject">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Prepare association result subset of project info</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="fork_extendWithProject"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<fork name="fork_extendWithProject">
|
||||||
|
<path start="extend_publication"/>
|
||||||
|
<path start="extend_dataset"/>
|
||||||
|
<path start="extend_orp"/>
|
||||||
|
<path start="extend_software"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
|
<action name="extend_publication">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Extend dumped publications with information about project</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/dump/publication</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/ext/publication</arg>
|
||||||
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_extend"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="extend_dataset">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Extend dumped dataset with information about project</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/dump/dataset</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/ext/dataset</arg>
|
||||||
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_extend"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="extend_orp">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Extend dumped ORP with information about project</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/ext/orp</arg>
|
||||||
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_extend"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="extend_software">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Extend dumped software with information about project</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/dump/software</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/ext/software</arg>
|
||||||
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_extend"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<join name="join_extend" to="splitForCommunities"/>
|
||||||
|
<action name="splitForCommunities">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Split dumped result for community</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/ext</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
|
||||||
|
</workflow-app>
|
|
@ -0,0 +1,30 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<value>openaire</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,539 @@
|
||||||
|
<workflow-app name="sub-dump_complete" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>sourcePath</name>
|
||||||
|
<description>the source path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>outputPath</name>
|
||||||
|
<description>the output path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>resultAggregation</name>
|
||||||
|
<description>true if all the result type have to be dumped under result. false otherwise</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>organizationCommunityMap</name>
|
||||||
|
<description>the organization community map</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<description>the target hive database name</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<description>hive server jdbc url</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<description>hive server metastore URIs</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorCores</name>
|
||||||
|
<description>number of cores used by single executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozieActionShareLibForSpark2</name>
|
||||||
|
<description>oozie action sharelib for spark 2.*</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2ExtraListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||||
|
<description>spark 2.* extra listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||||
|
<description>spark 2.* sql query execution listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2YarnHistoryServerAddress</name>
|
||||||
|
<description>spark 2.* yarn history server address</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2EventLogDir</name>
|
||||||
|
<description>spark 2.* event log dir location</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.job.queuename</name>
|
||||||
|
<value>${queueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||||
|
<value>${oozieLauncherQueueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
|
<start to="fork_dump" />
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<fork name="fork_dump">
|
||||||
|
<path start="dump_publication"/>
|
||||||
|
<path start="dump_dataset"/>
|
||||||
|
<path start="dump_orp"/>
|
||||||
|
<path start="dump_software"/>
|
||||||
|
<path start="dump_organization"/>
|
||||||
|
<path start="dump_project"/>
|
||||||
|
<path start="dump_datasource"/>
|
||||||
|
<path start="select_relation"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
|
<action name="dump_publication">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table publication </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/result/publication</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_dataset">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table dataset </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/result/dataset</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_orp">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table ORP </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_software">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table software </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/result/software</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_organization">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table organization </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/organization</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}/organization</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_project">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table project </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}/project</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_datasource">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table datasource </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/datasource</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}/datasource</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="select_relation">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Select valid table relation </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkSelectValidRelationsJob</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/validrelation</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="dump_relation"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_relation">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table relation </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/validrelation</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/relation/relation</arg>
|
||||||
|
<arg>--removeSet</arg><arg>${removeSet}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_dump"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<join name="join_dump" to="fork_context"/>
|
||||||
|
|
||||||
|
<fork name="fork_context">
|
||||||
|
<path start="create_entities_fromcontext"/>
|
||||||
|
<path start="create_relation_fromcontext"/>
|
||||||
|
<path start="create_relation_fromorgs"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
|
<action name="create_entities_fromcontext">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities</main-class>
|
||||||
|
<arg>--hdfsPath</arg><arg>${outputPath}/communities_infrastructures/communities_infrastructure.json.gz</arg>
|
||||||
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="join_context"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="create_relation_fromcontext">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation</main-class>
|
||||||
|
<arg>--hdfsPath</arg><arg>${workingDir}/relation/context</arg>
|
||||||
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="join_context"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="create_relation_fromorgs">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table relation </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/relation/contextOrg</arg>
|
||||||
|
<arg>--organizationCommunityMap</arg><arg>${organizationCommunityMap}</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_context"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<join name="join_context" to="fork_extract_relations"/>
|
||||||
|
|
||||||
|
<fork name="fork_extract_relations">
|
||||||
|
<path start="rels_from_pubs"/>
|
||||||
|
<path start="rels_from_dats"/>
|
||||||
|
<path start="rels_from_orp"/>
|
||||||
|
<path start="rels_from_sw"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
|
<action name="rels_from_pubs">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Extract Relations from publication </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/relation/publication</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_extract_relations"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="rels_from_dats">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table dataset </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/relation/dataset</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_extract_relations"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="rels_from_orp">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table ORP </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/relation/orp</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_extract_relations"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="rels_from_sw">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table software </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/relation/software</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_extract_relations"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<join name="join_extract_relations" to="collect_and_save"/>
|
||||||
|
|
||||||
|
<action name="collect_and_save">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Collect Results and Relations and put them in the right path </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkCollectAndSave</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--resultAggregation</arg><arg>${resultAggregation}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Sub-workflow dump complete failed with error message ${wf:errorMessage()}
|
||||||
|
</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<end name="End" />
|
||||||
|
</workflow-app>
|
|
@ -0,0 +1,30 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<value>openaire</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,255 @@
|
||||||
|
<workflow-app name="sub_dump_funder_results" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
<parameters>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>sourcePath</name>
|
||||||
|
<description>the source path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>outputPath</name>
|
||||||
|
<description>the output path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<description>the target hive database name</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<description>hive server jdbc url</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<description>hive server metastore URIs</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorCores</name>
|
||||||
|
<description>number of cores used by single executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozieActionShareLibForSpark2</name>
|
||||||
|
<description>oozie action sharelib for spark 2.*</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2ExtraListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||||
|
<description>spark 2.* extra listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||||
|
<description>spark 2.* sql query execution listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2YarnHistoryServerAddress</name>
|
||||||
|
<description>spark 2.* yarn history server address</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2EventLogDir</name>
|
||||||
|
<description>spark 2.* event log dir location</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.job.queuename</name>
|
||||||
|
<value>${queueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||||
|
<value>${oozieLauncherQueueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
|
<start to="prepareResultProject"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<action name="prepareResultProject">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Prepare association result subset of project info</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="fork_result_linked_to_projects"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<fork name="fork_result_linked_to_projects">
|
||||||
|
<path start="select_publication_linked_to_projects"/>
|
||||||
|
<path start="select_dataset_linked_to_projects"/>
|
||||||
|
<path start="select_orp_linked_to_project"/>
|
||||||
|
<path start="select_software_linked_to_projects"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
|
<action name="select_publication_linked_to_projects">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump funder results </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/result/publication</arg>
|
||||||
|
<arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_link"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="select_dataset_linked_to_projects">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump funder results </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/result/dataset</arg>
|
||||||
|
<arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_link"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="select_orp_linked_to_project">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump funder results </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
|
||||||
|
<arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_link"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="select_software_linked_to_projects">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump funder results </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/result/software</arg>
|
||||||
|
<arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join_link"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<join name="join_link" to="dump_funder_results"/>
|
||||||
|
|
||||||
|
<action name="dump_funder_results">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump funder results </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/result</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
|
||||||
|
</workflow-app>
|
|
@ -0,0 +1,970 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.Row;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Instance;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
|
|
||||||
|
//@Disabled
|
||||||
|
public class DumpJobTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class);
|
||||||
|
|
||||||
|
private static final CommunityMap map = new CommunityMap();
|
||||||
|
|
||||||
|
static {
|
||||||
|
map.put("egi", "EGI Federation");
|
||||||
|
map.put("fet-fp7", "FET FP7");
|
||||||
|
map.put("fet-h2020", "FET H2020");
|
||||||
|
map.put("clarin", "CLARIN");
|
||||||
|
map.put("fam", "Fisheries and Aquaculture Management");
|
||||||
|
map.put("ni", "Neuroinformatics");
|
||||||
|
map.put("mes", "European Marine Scinece");
|
||||||
|
map.put("instruct", "Instruct-Eric");
|
||||||
|
map.put("rda", "Research Data Alliance");
|
||||||
|
map.put("elixir-gr", "ELIXIR GR");
|
||||||
|
map.put("aginfra", "Agricultural and Food Sciences");
|
||||||
|
map.put("dariah", "DARIAH EU");
|
||||||
|
map.put("risis", "RISI");
|
||||||
|
map.put("ee", "SDSN - Greece");
|
||||||
|
map.put("oa-pg", "EC Post-Grant Open Access Pilot");
|
||||||
|
map.put("beopen", "Transport Research");
|
||||||
|
map.put("euromarine", "Euromarine");
|
||||||
|
map.put("ifremer", "Ifremer");
|
||||||
|
map.put("dh-ch", "Digital Humanities and Cultural Heritage");
|
||||||
|
map.put("science-innovation-policy", "Science and Innovation Policy Studies");
|
||||||
|
map.put("covid-19", "COVID-19");
|
||||||
|
map.put("enrmaps", "Energy Research");
|
||||||
|
map.put("epos", "EPOS");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> communityMap = Arrays
|
||||||
|
.asList(
|
||||||
|
"<community id=\"egi\" label=\"EGI Federation\"/>",
|
||||||
|
"<community id=\"fet-fp7\" label=\"FET FP7\"/>",
|
||||||
|
"<community id=\"fet-h2020\" label=\"FET H2020\"/>",
|
||||||
|
"<community id=\"clarin\" label=\"CLARIN\"/>",
|
||||||
|
"<community id=\"rda\" label=\"Research Data Alliance\"/>",
|
||||||
|
"<community id=\"ee\" label=\"SDSN - Greece\"/>",
|
||||||
|
"<community id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\"/>",
|
||||||
|
"<community id=\"fam\" label=\"Fisheries and Aquaculture Management\"/>",
|
||||||
|
"<community id=\"ni\" label=\"Neuroinformatics\"/>",
|
||||||
|
"<community id=\"mes\" label=\"European Marine Science\"/>",
|
||||||
|
"<community id=\"instruct\" label=\"Instruct-ERIC\"/>",
|
||||||
|
"<community id=\"elixir-gr\" label=\"ELIXIR GR\"/>",
|
||||||
|
"<community id=\"aginfra\" label=\"Agricultural and Food Sciences\"/>",
|
||||||
|
"<community id=\"dariah\" label=\"DARIAH EU\"/>",
|
||||||
|
"<community id=\"risis\" label=\"RISIS\"/>",
|
||||||
|
"<community id=\"epos\" label=\"EPOS\"/>",
|
||||||
|
"<community id=\"beopen\" label=\"Transport Research\"/>",
|
||||||
|
"<community id=\"euromarine\" label=\"EuroMarine\"/>",
|
||||||
|
"<community id=\"ifremer\" label=\"Ifremer\"/>",
|
||||||
|
"<community id=\"oa-pg\" label=\"EC Post-Grant Open Access Pilot\"/>",
|
||||||
|
"<community id=\"science-innovation-policy\" label=\"Science and Innovation Policy Studies\"/>",
|
||||||
|
"<community id=\"covid-19\" label=\"COVID-19\"/>",
|
||||||
|
"<community id=\"enermaps\" label=\"Energy Research\"/>");
|
||||||
|
|
||||||
|
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||||
|
" return " +
|
||||||
|
"<community> " +
|
||||||
|
"{$x//CONFIGURATION/context/@id}" +
|
||||||
|
"{$x//CONFIGURATION/context/@label}" +
|
||||||
|
"</community>";
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files.createTempDirectory(DumpJobTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(DumpJobTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(DumpJobTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMap() {
|
||||||
|
System.out.println(new Gson().toJson(map));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPublicationDump() {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
|
||||||
|
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
|
||||||
|
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<GraphResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, verificationDataset.count());
|
||||||
|
|
||||||
|
GraphResult gr = verificationDataset.first();
|
||||||
|
|
||||||
|
Assertions.assertEquals(6, gr.getAuthor().size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
gr
|
||||||
|
.getAuthor()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
a -> a.getFullname().equals("Nikolaidou,Charitini") &&
|
||||||
|
a.getName().equals("Charitini") && a.getSurname().equals("Nikolaidou")
|
||||||
|
&& a.getRank() == 1 && a.getPid() == null));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
gr
|
||||||
|
.getAuthor()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
a -> a.getFullname().equals("Votsi,Nefta") &&
|
||||||
|
a.getName().equals("Nefta") && a.getSurname().equals("Votsi")
|
||||||
|
&& a.getRank() == 2 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID)
|
||||||
|
&& a.getPid().getId().getValue().equals("0000-0001-6651-1178")
|
||||||
|
&& a.getPid().getProvenance() != null));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
gr
|
||||||
|
.getAuthor()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
a -> a.getFullname().equals("Sgardelis,Steanos") &&
|
||||||
|
a.getName().equals("Steanos") && a.getSurname().equals("Sgardelis")
|
||||||
|
&& a.getRank() == 3 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID_PENDING)
|
||||||
|
&& a.getPid().getId().getValue().equals("0000-0001-6651-1178")
|
||||||
|
&& a.getPid().getProvenance() != null));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
gr
|
||||||
|
.getAuthor()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
a -> a.getFullname().equals("Halley,John") &&
|
||||||
|
a.getName().equals("John") && a.getSurname().equals("Halley")
|
||||||
|
&& a.getRank() == 4 && a.getPid() == null));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
gr
|
||||||
|
.getAuthor()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
a -> a.getFullname().equals("Pantis,John") &&
|
||||||
|
a.getName().equals("John") && a.getSurname().equals("Pantis")
|
||||||
|
&& a.getRank() == 5 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID)
|
||||||
|
&& a.getPid().getId().getValue().equals("0000-0001-6651-1178")
|
||||||
|
&& a.getPid().getProvenance() != null));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
gr
|
||||||
|
.getAuthor()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
a -> a.getFullname().equals("Tsiafouli,Maria") &&
|
||||||
|
a.getName().equals("Maria") && a.getSurname().equals("Tsiafouli")
|
||||||
|
&& a.getRank() == 6 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID_PENDING)
|
||||||
|
&& a.getPid().getId().getValue().equals("0000-0001-6651-1178")
|
||||||
|
&& a.getPid().getProvenance() != null));
|
||||||
|
|
||||||
|
Assertions.assertEquals("publication", gr.getType());
|
||||||
|
|
||||||
|
Assertions.assertEquals("eng", gr.getLanguage().getCode());
|
||||||
|
Assertions.assertEquals("English", gr.getLanguage().getLabel());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, gr.getCountry().size());
|
||||||
|
Assertions.assertEquals("IT", gr.getCountry().get(0).getCode());
|
||||||
|
Assertions.assertEquals("Italy", gr.getCountry().get(0).getLabel());
|
||||||
|
Assertions.assertTrue(gr.getCountry().get(0).getProvenance() == null);
|
||||||
|
|
||||||
|
Assertions.assertEquals(12, gr.getSubjects().size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
gr
|
||||||
|
.getSubjects()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
s -> s.getSubject().getValue().equals("Ecosystem Services hotspots")
|
||||||
|
&& s.getSubject().getScheme().equals("ACM") && s.getProvenance() != null &&
|
||||||
|
s.getProvenance().getProvenance().equals("sysimport:crosswalk:repository")));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
gr
|
||||||
|
.getSubjects()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
s -> s.getSubject().getValue().equals("Natura 2000")
|
||||||
|
&& s.getSubject().getScheme().equals("") && s.getProvenance() != null &&
|
||||||
|
s.getProvenance().getProvenance().equals("sysimport:crosswalk:repository")));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"Ecosystem Service capacity is higher in areas of multiple designation types",
|
||||||
|
gr.getMaintitle());
|
||||||
|
|
||||||
|
Assertions.assertEquals(null, gr.getSubtitle());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, gr.getDescription().size());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
gr
|
||||||
|
.getDescription()
|
||||||
|
.get(0)
|
||||||
|
.startsWith("The implementation of the Ecosystem Service (ES) concept into practice"));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
gr
|
||||||
|
.getDescription()
|
||||||
|
.get(0)
|
||||||
|
.endsWith(
|
||||||
|
"start complying with new standards and demands for nature conservation and environmental management."));
|
||||||
|
|
||||||
|
Assertions.assertEquals("2017-01-01", gr.getPublicationdate());
|
||||||
|
|
||||||
|
Assertions.assertEquals("Pensoft Publishers", gr.getPublisher());
|
||||||
|
|
||||||
|
Assertions.assertEquals(null, gr.getEmbargoenddate());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, gr.getSource().size());
|
||||||
|
Assertions.assertEquals("One Ecosystem 2: e13718", gr.getSource().get(0));
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, gr.getFormat().size());
|
||||||
|
Assertions.assertEquals("text/html", gr.getFormat().get(0));
|
||||||
|
|
||||||
|
Assertions.assertEquals(0, gr.getContributor().size());
|
||||||
|
|
||||||
|
Assertions.assertEquals(0, gr.getCoverage().size());
|
||||||
|
|
||||||
|
Assertions.assertEquals(ModelConstants.ACCESS_RIGHT_OPEN, gr.getBestaccessright().getLabel());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode());
|
||||||
|
|
||||||
|
Assertions.assertEquals("One Ecosystem", gr.getContainer().getName());
|
||||||
|
Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline());
|
||||||
|
Assertions.assertEquals("", gr.getContainer().getIssnPrinted());
|
||||||
|
Assertions.assertEquals("", gr.getContainer().getIssnLinking());
|
||||||
|
|
||||||
|
Assertions.assertTrue(null == gr.getDocumentationUrl() || gr.getDocumentationUrl().size() == 0);
|
||||||
|
|
||||||
|
Assertions.assertTrue(null == gr.getCodeRepositoryUrl());
|
||||||
|
|
||||||
|
Assertions.assertEquals(null, gr.getProgrammingLanguage());
|
||||||
|
|
||||||
|
Assertions.assertTrue(null == gr.getContactperson() || gr.getContactperson().size() == 0);
|
||||||
|
|
||||||
|
Assertions.assertTrue(null == gr.getContactgroup() || gr.getContactgroup().size() == 0);
|
||||||
|
|
||||||
|
Assertions.assertTrue(null == gr.getTool() || gr.getTool().size() == 0);
|
||||||
|
|
||||||
|
Assertions.assertEquals(null, gr.getSize());
|
||||||
|
|
||||||
|
Assertions.assertEquals(null, gr.getVersion());
|
||||||
|
|
||||||
|
Assertions.assertTrue(null == gr.getGeolocation() || gr.getGeolocation().size() == 0);
|
||||||
|
|
||||||
|
Assertions.assertEquals("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2", gr.getId());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, gr.getOriginalId().size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
gr.getOriginalId().contains("10.3897/oneeco.2.e13718"));
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, gr.getPid().size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
gr.getPid().get(0).getScheme().equals("doi")
|
||||||
|
&& gr.getPid().get(0).getValue().equals("10.1016/j.triboint.2014.05.004"));
|
||||||
|
|
||||||
|
Assertions.assertEquals("2020-03-23T00:20:51.392Z", gr.getDateofcollection());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, gr.getInstance().size());
|
||||||
|
|
||||||
|
Instance instance = gr.getInstance().get(0);
|
||||||
|
Assertions.assertEquals(0, instance.getPid().size());
|
||||||
|
Assertions.assertEquals(1, instance.getAlternateIdentifier().size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
instance.getAlternateIdentifier().get(0).getScheme().equals("doi")
|
||||||
|
&& instance.getAlternateIdentifier().get(0).getValue().equals("10.3897/oneeco.2.e13718"));
|
||||||
|
Assertions.assertEquals(null, instance.getLicense());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
instance
|
||||||
|
.getAccessright()
|
||||||
|
.getCode()
|
||||||
|
.equals(
|
||||||
|
Constants.accessRightsCoarMap
|
||||||
|
.get(ModelConstants.ACCESS_RIGHT_OPEN)));
|
||||||
|
Assertions.assertTrue(instance.getAccessright().getLabel().equals(ModelConstants.ACCESS_RIGHT_OPEN));
|
||||||
|
Assertions.assertTrue(instance.getAccessright().getOpenAccessRoute().equals(OpenAccessRoute.green));
|
||||||
|
Assertions.assertTrue(instance.getType().equals("Article"));
|
||||||
|
Assertions.assertEquals(2, instance.getUrl().size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
instance.getUrl().contains("https://doi.org/10.3897/oneeco.2.e13718")
|
||||||
|
&& instance.getUrl().contains("https://oneecosystem.pensoft.net/article/13718/"));
|
||||||
|
Assertions.assertEquals("2017-01-01", instance.getPublicationdate());
|
||||||
|
Assertions.assertEquals(null, instance.getArticleprocessingcharge());
|
||||||
|
Assertions.assertEquals("peerReviewed", instance.getRefereed());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDatasetDump() {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
false, sourcePath, workingDir.toString() + "/result",
|
||||||
|
communityMapPath, Dataset.class,
|
||||||
|
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<GraphResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, verificationDataset.filter("type = 'dataset'").count());
|
||||||
|
|
||||||
|
// the common fields in the result have been already checked. Now checking only
|
||||||
|
// community specific fields
|
||||||
|
|
||||||
|
GraphResult gr = verificationDataset.first();
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, gr.getGeolocation().size());
|
||||||
|
Assertions.assertEquals(2, gr.getGeolocation().stream().filter(gl -> gl.getBox().equals("")).count());
|
||||||
|
Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPlace().equals("")).count());
|
||||||
|
Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("")).count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
gr
|
||||||
|
.getGeolocation()
|
||||||
|
.stream()
|
||||||
|
.filter(gl -> gl.getPlace().equals("18 York St, Ottawa, ON K1N 5S6; Ottawa; Ontario; Canada"))
|
||||||
|
.count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("45.427242 -75.693904")).count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
gr
|
||||||
|
.getGeolocation()
|
||||||
|
.stream()
|
||||||
|
.filter(gl -> gl.getPoint().equals("") && !gl.getPlace().equals(""))
|
||||||
|
.count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
gr
|
||||||
|
.getGeolocation()
|
||||||
|
.stream()
|
||||||
|
.filter(gl -> !gl.getPoint().equals("") && gl.getPlace().equals(""))
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals("1024Gb", gr.getSize());
|
||||||
|
|
||||||
|
Assertions.assertEquals("1.01", gr.getVersion());
|
||||||
|
|
||||||
|
Assertions.assertEquals(null, gr.getContainer());
|
||||||
|
Assertions.assertEquals(null, gr.getCodeRepositoryUrl());
|
||||||
|
Assertions.assertEquals(null, gr.getProgrammingLanguage());
|
||||||
|
Assertions.assertEquals(null, gr.getDocumentationUrl());
|
||||||
|
Assertions.assertEquals(null, gr.getContactperson());
|
||||||
|
Assertions.assertEquals(null, gr.getContactgroup());
|
||||||
|
Assertions.assertEquals(null, gr.getTool());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSoftwareDump() {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
false, sourcePath, workingDir.toString() + "/result",
|
||||||
|
communityMapPath, Software.class,
|
||||||
|
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<GraphResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, verificationDataset.filter("type = 'software'").count());
|
||||||
|
|
||||||
|
GraphResult gr = verificationDataset.first();
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, gr.getDocumentationUrl().size());
|
||||||
|
Assertions.assertTrue(gr.getDocumentationUrl().contains("doc_url_1"));
|
||||||
|
Assertions.assertTrue(gr.getDocumentationUrl().contains("doc_url_2"));
|
||||||
|
|
||||||
|
Assertions.assertEquals("code_repo", gr.getCodeRepositoryUrl());
|
||||||
|
|
||||||
|
Assertions.assertEquals("perl", gr.getProgrammingLanguage());
|
||||||
|
|
||||||
|
Assertions.assertEquals(null, gr.getContainer());
|
||||||
|
Assertions.assertEquals(null, gr.getContactperson());
|
||||||
|
Assertions.assertEquals(null, gr.getContactgroup());
|
||||||
|
Assertions.assertEquals(null, gr.getTool());
|
||||||
|
Assertions.assertEquals(null, gr.getGeolocation());
|
||||||
|
Assertions.assertEquals(null, gr.getSize());
|
||||||
|
Assertions.assertEquals(null, gr.getVersion());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOrpDump() {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
false, sourcePath, workingDir.toString() + "/result",
|
||||||
|
communityMapPath, OtherResearchProduct.class,
|
||||||
|
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<GraphResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, verificationDataset.filter("type = 'other'").count());
|
||||||
|
|
||||||
|
GraphResult gr = verificationDataset.first();
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, gr.getContactperson().size());
|
||||||
|
Assertions.assertTrue(gr.getContactperson().contains(("contact_person1")));
|
||||||
|
Assertions.assertTrue(gr.getContactperson().contains(("contact_person2")));
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, gr.getContactgroup().size());
|
||||||
|
Assertions.assertTrue(gr.getContactgroup().contains(("contact_group")));
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, gr.getTool().size());
|
||||||
|
Assertions.assertTrue(gr.getTool().contains("tool1"));
|
||||||
|
Assertions.assertTrue(gr.getTool().contains("tool2"));
|
||||||
|
|
||||||
|
Assertions.assertEquals(null, gr.getContainer());
|
||||||
|
Assertions.assertEquals(null, gr.getDocumentationUrl());
|
||||||
|
Assertions.assertEquals(null, gr.getCodeRepositoryUrl());
|
||||||
|
Assertions.assertEquals(null, gr.getProgrammingLanguage());
|
||||||
|
Assertions.assertEquals(null, gr.getGeolocation());
|
||||||
|
Assertions.assertEquals(null, gr.getSize());
|
||||||
|
Assertions.assertEquals(null, gr.getVersion());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPublicationDumpCommunity() throws JsonProcessingException {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
|
||||||
|
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, verificationDataset.filter("type = 'publication'").count());
|
||||||
|
|
||||||
|
// the common fields in the result have been already checked. Now checking only
|
||||||
|
// community specific fields
|
||||||
|
|
||||||
|
CommunityResult cr = verificationDataset.first();
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, cr.getContext().size());
|
||||||
|
Assertions.assertEquals("dh-ch", cr.getContext().get(0).getCode());
|
||||||
|
Assertions.assertEquals("Digital Humanities and Cultural Heritage", cr.getContext().get(0).getLabel());
|
||||||
|
Assertions.assertEquals(1, cr.getContext().get(0).getProvenance().size());
|
||||||
|
Assertions.assertEquals("Inferred by OpenAIRE", cr.getContext().get(0).getProvenance().get(0).getProvenance());
|
||||||
|
Assertions.assertEquals("0.9", cr.getContext().get(0).getProvenance().get(0).getTrust());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, cr.getCollectedfrom().size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals("10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getCollectedfrom().get(0).getKey());
|
||||||
|
Assertions.assertEquals("Pensoft", cr.getCollectedfrom().get(0).getValue());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, cr.getInstance().size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db",
|
||||||
|
cr.getInstance().get(0).getCollectedfrom().getKey());
|
||||||
|
Assertions.assertEquals("Pensoft", cr.getInstance().get(0).getCollectedfrom().getValue());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd", cr.getInstance().get(0).getHostedby().getKey());
|
||||||
|
Assertions.assertEquals("One Ecosystem", cr.getInstance().get(0).getHostedby().getValue());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDataset() {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
|
||||||
|
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(90, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset
|
||||||
|
.filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
verificationDataset.filter("bestAccessright.code = 'c_16ec'").count() == verificationDataset
|
||||||
|
.filter("bestAccessright.code = 'c_16ec' and bestAccessright.label = 'RESTRICTED'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
verificationDataset.filter("bestAccessright.code = 'c_14cb'").count() == verificationDataset
|
||||||
|
.filter("bestAccessright.code = 'c_14cb' and bestAccessright.label = 'CLOSED'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
verificationDataset.filter("bestAccessright.code = 'c_f1cf'").count() == verificationDataset
|
||||||
|
.filter("bestAccessright.code = 'c_f1cf' and bestAccessright.label = 'EMBARGO'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90);
|
||||||
|
|
||||||
|
Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDataset2All() {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
|
||||||
|
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
|
||||||
|
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<GraphResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(5, verificationDataset.count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDataset2Communities() {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
|
||||||
|
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(0, verificationDataset.count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPublication() {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
|
||||||
|
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
|
||||||
|
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(74, verificationDataset.count());
|
||||||
|
verificationDataset.show(false);
|
||||||
|
|
||||||
|
Assertions.assertEquals(74, verificationDataset.filter("type = 'publication'").count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSoftware() {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class,
|
||||||
|
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class,
|
||||||
|
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(6, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testORP() {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class,
|
||||||
|
false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class,
|
||||||
|
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(3, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRecord() {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
|
||||||
|
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, verificationDataset.count());
|
||||||
|
verificationDataset.show(false);
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, verificationDataset.filter("type = 'publication'").count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testArticlePCA() {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpProducts dump = new DumpProducts();
|
||||||
|
dump
|
||||||
|
.run(
|
||||||
|
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
|
||||||
|
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<GraphResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(23, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(23, verificationDataset.filter("type = 'publication'").count());
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("check");
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<Row> temp = spark
|
||||||
|
.sql(
|
||||||
|
"select id " +
|
||||||
|
"from check " +
|
||||||
|
"lateral view explode (instance) i as inst " +
|
||||||
|
"where inst.articleprocessingcharge is not null");
|
||||||
|
|
||||||
|
Assertions.assertTrue(temp.count() == 2);
|
||||||
|
|
||||||
|
Assertions.assertTrue(temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").count() == 1);
|
||||||
|
|
||||||
|
Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1);
|
||||||
|
|
||||||
|
temp = spark
|
||||||
|
.sql(
|
||||||
|
"select id, inst.articleprocessingcharge.amount, inst.articleprocessingcharge.currency " +
|
||||||
|
"from check " +
|
||||||
|
"lateral view explode (instance) i as inst " +
|
||||||
|
"where inst.articleprocessingcharge is not null");
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"3131.64",
|
||||||
|
temp
|
||||||
|
.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'")
|
||||||
|
.collectAsList()
|
||||||
|
.get(0)
|
||||||
|
.getString(1));
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"EUR",
|
||||||
|
temp
|
||||||
|
.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'")
|
||||||
|
.collectAsList()
|
||||||
|
.get(0)
|
||||||
|
.getString(2));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"2578.35",
|
||||||
|
temp
|
||||||
|
.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'")
|
||||||
|
.collectAsList()
|
||||||
|
.get(0)
|
||||||
|
.getString(1));
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"EUR",
|
||||||
|
temp
|
||||||
|
.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'")
|
||||||
|
.collectAsList()
|
||||||
|
.get(0)
|
||||||
|
.getString(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
|
import com.github.victools.jsonschema.generator.*;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult;
|
||||||
|
|
||||||
|
//@Disabled
|
||||||
|
class GenerateJsonSchema {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void generateSchema() {
|
||||||
|
SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_7,
|
||||||
|
OptionPreset.PLAIN_JSON)
|
||||||
|
.with(Option.SCHEMA_VERSION_INDICATOR)
|
||||||
|
.without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS);
|
||||||
|
configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName());
|
||||||
|
SchemaGeneratorConfig config = configBuilder.build();
|
||||||
|
SchemaGenerator generator = new SchemaGenerator(config);
|
||||||
|
JsonNode jsonSchema = generator.generateSchema(GraphResult.class);
|
||||||
|
|
||||||
|
System.out.println(jsonSchema.toString());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,62 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.LocalFileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Disabled;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
@Disabled
|
||||||
|
public class MakeTarTest {
|
||||||
|
private static String workingDir;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files
|
||||||
|
.createTempDirectory(MakeTarTest.class.getSimpleName())
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testTar() throws IOException {
|
||||||
|
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
|
||||||
|
|
||||||
|
fs
|
||||||
|
.copyFromLocalFile(
|
||||||
|
false, new Path(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni")
|
||||||
|
.getPath()),
|
||||||
|
new Path(workingDir + "/zenodo/ni/part-00000"));
|
||||||
|
|
||||||
|
fs
|
||||||
|
.copyFromLocalFile(
|
||||||
|
false, new Path(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/egi")
|
||||||
|
.getPath()),
|
||||||
|
new Path(workingDir + "/zenodo/ni/part-00001"));
|
||||||
|
|
||||||
|
fs
|
||||||
|
.copyFromLocalFile(
|
||||||
|
false, new Path(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/dh-ch")
|
||||||
|
.getPath()),
|
||||||
|
new Path(workingDir + "/zenodo/dh-ch/part-00000"));
|
||||||
|
fs
|
||||||
|
.copyFromLocalFile(
|
||||||
|
false, new Path(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/science-innovation-policy")
|
||||||
|
.getPath()),
|
||||||
|
new Path(workingDir + "/zenodo/ni/part-00002"));
|
||||||
|
|
||||||
|
String inputPath = workingDir + "/zenodo/";
|
||||||
|
|
||||||
|
MakeTar.makeTArArchive(fs, inputPath, "/tmp/out", 0);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,347 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.Row;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.ResultProject;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject;
|
||||||
|
|
||||||
|
public class PrepareResultProjectJobTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory
|
||||||
|
.getLogger(PrepareResultProjectJobTest.class);
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files
|
||||||
|
.createTempDirectory(PrepareResultProjectJobTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(PrepareResultProjectJobTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(PrepareResultProjectJobTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testNoMatch() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/no_match")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkPrepareResultProject.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/preparedInfo",
|
||||||
|
"-sourcePath", sourcePath
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<ResultProject> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/preparedInfo")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class));
|
||||||
|
|
||||||
|
Dataset<ResultProject> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(ResultProject.class));
|
||||||
|
|
||||||
|
assertEquals(0, verificationDataset.count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMatchOne() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_one")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkPrepareResultProject.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/preparedInfo",
|
||||||
|
"-sourcePath", sourcePath
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<ResultProject> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/preparedInfo")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class));
|
||||||
|
|
||||||
|
Dataset<ResultProject> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(ResultProject.class));
|
||||||
|
|
||||||
|
assertEquals(1, verificationDataset.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("table");
|
||||||
|
|
||||||
|
Dataset<Row> check = spark
|
||||||
|
.sql(
|
||||||
|
"Select projList.provenance.provenance " +
|
||||||
|
"from table " +
|
||||||
|
"lateral view explode (projectsList) pl as projList");
|
||||||
|
|
||||||
|
assertEquals(1, check.filter("provenance = 'sysimport:crosswalk:entityregistry'").count());
|
||||||
|
|
||||||
|
verificationDataset.show(false);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMatch() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkPrepareResultProject.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/preparedInfo",
|
||||||
|
"-sourcePath", sourcePath
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<ResultProject> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/preparedInfo")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class));
|
||||||
|
|
||||||
|
Dataset<ResultProject> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(ResultProject.class));
|
||||||
|
|
||||||
|
assertEquals(2, verificationDataset.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("dataset");
|
||||||
|
|
||||||
|
String query = "select resultId, MyT.id project , MyT.title title, MyT.acronym acronym , MyT.provenance.provenance provenance "
|
||||||
|
+ "from dataset "
|
||||||
|
+ "lateral view explode(projectsList) p as MyT ";
|
||||||
|
|
||||||
|
Dataset<Row> resultExplodedProvenance = spark.sql(query);
|
||||||
|
assertEquals(3, resultExplodedProvenance.count());
|
||||||
|
assertEquals(
|
||||||
|
2,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
2,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter(
|
||||||
|
"project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter(
|
||||||
|
"project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter(
|
||||||
|
"project = '40|aka_________::03376222b28a3aebf2730ac514818d04' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMatchValidated() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_validatedRels")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkPrepareResultProject.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/preparedInfo",
|
||||||
|
"-sourcePath", sourcePath
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<ResultProject> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/preparedInfo")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class));
|
||||||
|
|
||||||
|
Dataset<ResultProject> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(ResultProject.class));
|
||||||
|
|
||||||
|
assertEquals(2, verificationDataset.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("dataset");
|
||||||
|
|
||||||
|
String query = "select resultId, MyT.id project , MyT.title title, MyT.acronym acronym , MyT.provenance.provenance provenance, "
|
||||||
|
+
|
||||||
|
"MyT.validated.validatedByFunder, MyT.validated.validationDate "
|
||||||
|
+ "from dataset "
|
||||||
|
+ "lateral view explode(projectsList) p as MyT ";
|
||||||
|
|
||||||
|
Dataset<Row> resultExplodedProvenance = spark.sql(query);
|
||||||
|
assertEquals(3, resultExplodedProvenance.count());
|
||||||
|
assertEquals(3, resultExplodedProvenance.filter("validatedByFunder = true").count());
|
||||||
|
assertEquals(
|
||||||
|
2,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
2,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter(
|
||||||
|
"project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " +
|
||||||
|
"and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " +
|
||||||
|
"and validatedByFunder = true " +
|
||||||
|
"and validationDate = '2021-08-06'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter(
|
||||||
|
"project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " +
|
||||||
|
"and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80' " +
|
||||||
|
"and validatedByFunder = true and validationDate = '2021-08-04'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter(
|
||||||
|
"project = '40|aka_________::03376222b28a3aebf2730ac514818d04' " +
|
||||||
|
"and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " +
|
||||||
|
"and validatedByFunder = true and validationDate = '2021-08-05'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMatchx() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkPrepareResultProject.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/preparedInfo",
|
||||||
|
"-sourcePath", sourcePath
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<ResultProject> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/preparedInfo")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class));
|
||||||
|
|
||||||
|
tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,117 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.lenient;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
class QueryInformationSystemTest {
|
||||||
|
|
||||||
|
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||||
|
" and ($x//context/param[./@name = 'status']/text() = 'all') "
|
||||||
|
+
|
||||||
|
" return " +
|
||||||
|
"<community> " +
|
||||||
|
"{$x//CONFIGURATION/context/@id}" +
|
||||||
|
"{$x//CONFIGURATION/context/@label}" +
|
||||||
|
"</community>";
|
||||||
|
|
||||||
|
List<String> communityMap = Arrays
|
||||||
|
.asList(
|
||||||
|
"<community id=\"egi\" label=\"EGI Federation\"/>",
|
||||||
|
"<community id=\"fet-fp7\" label=\"FET FP7\"/>",
|
||||||
|
"<community id=\"fet-h2020\" label=\"FET H2020\"/>",
|
||||||
|
"<community id=\"clarin\" label=\"CLARIN\"/>",
|
||||||
|
"<community id=\"rda\" label=\"Research Data Alliance\"/>",
|
||||||
|
"<community id=\"ee\" label=\"SDSN - Greece\"/>",
|
||||||
|
"<community id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\"/>",
|
||||||
|
"<community id=\"fam\" label=\"Fisheries and Aquaculture Management\"/>",
|
||||||
|
"<community id=\"ni\" label=\"Neuroinformatics\"/>",
|
||||||
|
"<community id=\"mes\" label=\"European Marine Science\"/>",
|
||||||
|
"<community id=\"instruct\" label=\"Instruct-ERIC\"/>",
|
||||||
|
"<community id=\"elixir-gr\" label=\"ELIXIR GR\"/>",
|
||||||
|
"<community id=\"aginfra\" label=\"Agricultural and Food Sciences\"/>",
|
||||||
|
"<community id=\"dariah\" label=\"DARIAH EU\"/>",
|
||||||
|
"<community id=\"risis\" label=\"RISIS\"/>",
|
||||||
|
"<community id=\"epos\" label=\"EPOS\"/>",
|
||||||
|
"<community id=\"beopen\" label=\"Transport Research\"/>",
|
||||||
|
"<community id=\"euromarine\" label=\"EuroMarine\"/>",
|
||||||
|
"<community id=\"ifremer\" label=\"Ifremer\"/>",
|
||||||
|
"<community id=\"oa-pg\" label=\"EC Post-Grant Open Access Pilot\"/>",
|
||||||
|
"<community id=\"science-innovation-policy\" label=\"Science and Innovation Policy Studies\"/>",
|
||||||
|
"<community id=\"covid-19\" label=\"COVID-19\"/>",
|
||||||
|
"<community id=\"enermaps\" label=\"Energy Research\"/>");
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
private ISLookUpService isLookUpService;
|
||||||
|
|
||||||
|
private QueryInformationSystem queryInformationSystem;
|
||||||
|
|
||||||
|
private Map<String, String> map;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void setUp() throws ISLookUpException, DocumentException, SAXException {
|
||||||
|
lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap);
|
||||||
|
queryInformationSystem = new QueryInformationSystem();
|
||||||
|
queryInformationSystem.setIsLookUp(isLookUpService);
|
||||||
|
map = queryInformationSystem.getCommunityMap(false, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testSize() throws ISLookUpException {
|
||||||
|
|
||||||
|
Assertions.assertEquals(23, map.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testContent() {
|
||||||
|
Assertions.assertTrue(map.containsKey("egi") && map.get("egi").equals("EGI Federation"));
|
||||||
|
|
||||||
|
Assertions.assertTrue(map.containsKey("fet-fp7") && map.get("fet-fp7").equals("FET FP7"));
|
||||||
|
Assertions.assertTrue(map.containsKey("fet-h2020") && map.get("fet-h2020").equals("FET H2020"));
|
||||||
|
Assertions.assertTrue(map.containsKey("clarin") && map.get("clarin").equals("CLARIN"));
|
||||||
|
Assertions.assertTrue(map.containsKey("rda") && map.get("rda").equals("Research Data Alliance"));
|
||||||
|
Assertions.assertTrue(map.containsKey("ee") && map.get("ee").equals("SDSN - Greece"));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
map.containsKey("dh-ch") && map.get("dh-ch").equals("Digital Humanities and Cultural Heritage"));
|
||||||
|
Assertions.assertTrue(map.containsKey("fam") && map.get("fam").equals("Fisheries and Aquaculture Management"));
|
||||||
|
Assertions.assertTrue(map.containsKey("ni") && map.get("ni").equals("Neuroinformatics"));
|
||||||
|
Assertions.assertTrue(map.containsKey("mes") && map.get("mes").equals("European Marine Science"));
|
||||||
|
Assertions.assertTrue(map.containsKey("instruct") && map.get("instruct").equals("Instruct-ERIC"));
|
||||||
|
Assertions.assertTrue(map.containsKey("elixir-gr") && map.get("elixir-gr").equals("ELIXIR GR"));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(map.containsKey("aginfra") && map.get("aginfra").equals("Agricultural and Food Sciences"));
|
||||||
|
Assertions.assertTrue(map.containsKey("dariah") && map.get("dariah").equals("DARIAH EU"));
|
||||||
|
Assertions.assertTrue(map.containsKey("risis") && map.get("risis").equals("RISIS"));
|
||||||
|
Assertions.assertTrue(map.containsKey("epos") && map.get("epos").equals("EPOS"));
|
||||||
|
Assertions.assertTrue(map.containsKey("beopen") && map.get("beopen").equals("Transport Research"));
|
||||||
|
Assertions.assertTrue(map.containsKey("euromarine") && map.get("euromarine").equals("EuroMarine"));
|
||||||
|
Assertions.assertTrue(map.containsKey("ifremer") && map.get("ifremer").equals("Ifremer"));
|
||||||
|
Assertions.assertTrue(map.containsKey("oa-pg") && map.get("oa-pg").equals("EC Post-Grant Open Access Pilot"));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
map.containsKey("science-innovation-policy")
|
||||||
|
&& map.get("science-innovation-policy").equals("Science and Innovation Policy Studies"));
|
||||||
|
Assertions.assertTrue(map.containsKey("covid-19") && map.get("covid-19").equals("COVID-19"));
|
||||||
|
Assertions.assertTrue(map.containsKey("enermaps") && map.get("enermaps").equals("Energy Research"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,143 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunitySplit;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
|
||||||
|
public class SplitForCommunityTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class);
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files.createTempDirectory(SplitForCommunityTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(SplitForCommunityTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(SplitForCommunityTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCommunitySplit() {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
CommunitySplit split = new CommunitySplit();
|
||||||
|
|
||||||
|
split.run(false, sourcePath, workingDir.toString() + "/split", communityMapPath);
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/Digital_Humanities_and_Cultural_Heritage")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(19, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
|
||||||
|
|
||||||
|
tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/EGI_Federation")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, verificationDataset.filter("id = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
|
||||||
|
|
||||||
|
tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/Neuroinformatics")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(5, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, verificationDataset.filter("id = '50|datacite____::6b1e3a2fa60ed8c27317a66d6357f795'").count());
|
||||||
|
|
||||||
|
tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/Science_and_Innovation_Policy_Studies")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(4, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, verificationDataset.filter("id = '50|dedup_wf_001::0347b1cd516fc59e41ba92e0d74e4e9f'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, verificationDataset.filter("id = '50|dedup_wf_001::1432beb6171baa5da8a85a7f99545d69'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, verificationDataset.filter("id = '50|dedup_wf_001::1c8bd19e633976e314b88ce5c3f92d69'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,232 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.Row;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||||
|
|
||||||
|
public class UpdateProjectInfoTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(UpdateProjectInfoTest.class);
|
||||||
|
|
||||||
|
private static final HashMap<String, String> map = new HashMap<>();
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files
|
||||||
|
.createTempDirectory(UpdateProjectInfoTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(UpdateProjectInfoTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(UpdateProjectInfoTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void test1() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkUpdateProjectInfo.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-preparedInfoPath", sourcePath + "/preparedInfo",
|
||||||
|
"-outputPath", workingDir.toString() + "/result",
|
||||||
|
"-sourcePath", sourcePath + "/software.json"
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
verificationDataset.show(false);
|
||||||
|
|
||||||
|
Assertions.assertEquals(6, verificationDataset.count());
|
||||||
|
verificationDataset.createOrReplaceTempView("dataset");
|
||||||
|
|
||||||
|
String query = "select id, MyT.code code, MyT.title title, MyT.funder.name funderName, MyT.funder.shortName funderShortName, "
|
||||||
|
+
|
||||||
|
"MyT.funder.jurisdiction funderJurisdiction, MyT.funder.fundingStream fundingStream "
|
||||||
|
+ "from dataset " +
|
||||||
|
"lateral view explode(projects) p as MyT ";
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<Row> resultExplodedProvenance = spark.sql(query);
|
||||||
|
|
||||||
|
Assertions.assertEquals(3, resultExplodedProvenance.count());
|
||||||
|
resultExplodedProvenance.show(false);
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
2,
|
||||||
|
resultExplodedProvenance.filter("id = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("id = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' and code = '123455'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("id = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' and code = '119027'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80' and code = '123455'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
resultExplodedProvenance.show(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testValidatedRelation() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkUpdateProjectInfo.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-preparedInfoPath", sourcePath + "/preparedInfoValidated",
|
||||||
|
"-outputPath", workingDir.toString() + "/result",
|
||||||
|
"-sourcePath", sourcePath + "/publication_extendedmodel"
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/result")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
verificationDataset.show(false);
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, verificationDataset.count());
|
||||||
|
verificationDataset.createOrReplaceTempView("dataset");
|
||||||
|
|
||||||
|
String query = "select id, MyT.code code, MyT.title title, MyT.funder.name funderName, MyT.funder.shortName funderShortName, "
|
||||||
|
+
|
||||||
|
"MyT.funder.jurisdiction funderJurisdiction, MyT.funder.fundingStream fundingStream, MyT.validated "
|
||||||
|
+ "from dataset " +
|
||||||
|
"lateral view explode(projects) p as MyT ";
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<Row> resultExplodedProvenance = spark.sql(query);
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, resultExplodedProvenance.count());
|
||||||
|
resultExplodedProvenance.show(false);
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
2,
|
||||||
|
resultExplodedProvenance.filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2'").count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2' and code = '123455'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
resultExplodedProvenance
|
||||||
|
.filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2' and code = '119027'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Project project = verificationDataset
|
||||||
|
.map(
|
||||||
|
(MapFunction<CommunityResult, Project>) cr -> cr
|
||||||
|
.getProjects()
|
||||||
|
.stream()
|
||||||
|
.filter(p -> p.getValidated() != null)
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0),
|
||||||
|
Encoders.bean(Project.class))
|
||||||
|
.first();
|
||||||
|
|
||||||
|
Assertions.assertTrue(project.getFunder().getName().equals("Academy of Finland"));
|
||||||
|
Assertions.assertTrue(project.getFunder().getShortName().equals("AKA"));
|
||||||
|
Assertions.assertTrue(project.getFunder().getJurisdiction().equals("FI"));
|
||||||
|
Assertions.assertTrue(project.getFunder().getFundingStream() == null);
|
||||||
|
Assertions.assertTrue(project.getValidated().getValidationDate().equals("2021-08-06"));
|
||||||
|
|
||||||
|
project = verificationDataset
|
||||||
|
.map(
|
||||||
|
(MapFunction<CommunityResult, Project>) cr -> cr
|
||||||
|
.getProjects()
|
||||||
|
.stream()
|
||||||
|
.filter(p -> p.getValidated() == null)
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0),
|
||||||
|
Encoders.bean(Project.class))
|
||||||
|
.first();
|
||||||
|
|
||||||
|
Assertions.assertTrue(project.getFunder().getName().equals("European Commission"));
|
||||||
|
Assertions.assertTrue(project.getFunder().getShortName().equals("EC"));
|
||||||
|
Assertions.assertTrue(project.getFunder().getJurisdiction().equals("EU"));
|
||||||
|
Assertions.assertTrue(project.getFunder().getFundingStream().equals("H2020"));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,153 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.*;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Disabled;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
|
||||||
|
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
|
||||||
|
@Disabled
|
||||||
|
public class ZenodoUploadTest {
|
||||||
|
|
||||||
|
private static String workingDir;
|
||||||
|
|
||||||
|
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
||||||
|
private final String ACCESS_TOKEN = "";
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files
|
||||||
|
.createTempDirectory(UpdateProjectInfoTest.class.getSimpleName())
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testNewDeposition() throws IOException {
|
||||||
|
CommunityMap communityMap = new CommunityMap();
|
||||||
|
communityMap.put("ni", "Neuroinformatics");
|
||||||
|
communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage");
|
||||||
|
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
|
||||||
|
|
||||||
|
fs
|
||||||
|
.copyFromLocalFile(
|
||||||
|
false, new Path(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni")
|
||||||
|
.getPath()),
|
||||||
|
new Path(workingDir + "/zenodo/ni/ni"));
|
||||||
|
fs
|
||||||
|
.copyFromLocalFile(
|
||||||
|
false, new Path(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/dh-ch")
|
||||||
|
.getPath()),
|
||||||
|
new Path(workingDir + "/zenodo/dh-ch/dh-ch"));
|
||||||
|
|
||||||
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
|
ACCESS_TOKEN);
|
||||||
|
client.newDeposition();
|
||||||
|
|
||||||
|
// the second boolean parameter here sets the recursion to true
|
||||||
|
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fs
|
||||||
|
.listFiles(
|
||||||
|
new Path(workingDir + "/zenodo"), true);
|
||||||
|
while (fileStatusListIterator.hasNext()) {
|
||||||
|
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||||
|
|
||||||
|
String p_string = fileStatus.getPath().toString();
|
||||||
|
|
||||||
|
int index = p_string.lastIndexOf("/");
|
||||||
|
String community = p_string.substring(0, index);
|
||||||
|
community = community.substring(community.lastIndexOf("/") + 1);
|
||||||
|
String community_name = communityMap.get(community).replace(" ", "_");
|
||||||
|
// fs.copyToLocalFile(fileStatus.getPath(), new Path("/tmp/" + community_name));
|
||||||
|
System.out.println(community);
|
||||||
|
|
||||||
|
// File f = new File("/tmp/" + community_name);
|
||||||
|
FSDataInputStream inputStream = fs.open(fileStatus.getPath());
|
||||||
|
System.out.println(client.uploadIS(inputStream, community_name, fileStatus.getLen()));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
String metadata = "{\"metadata\":{\"access_right\":\"open\",\"communities\":[{\"identifier\":\"openaire-research-graph\"}],\"creators\":[{\"affiliation\":\"CNR - ISTI\",\"name\":\"Manghi, Paolo\",\"orcid\":\"0000-0001-7291-3210\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Atzori, Claudio\",\"orcid\":\"0000-0001-9613-6639\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Bardi, Alessia\",\"orcid\":\"0000-0002-1112-1292\"},{\"affiliation\":\"ISTI - CNR\",\"name\":\"Baglioni, Miriam\",\"orcid\":\"0000-0002-2273-9004\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Shirrwagen, Jochen\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Dimitropoulos, Harry\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"La Bruzzo, Sandro\",\"orcid\":\"0000-0003-2855-1245\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Foufoulas, Ioannis\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Löhden, Aenne\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Bäcker, Amelie\",\"orcid\":\"0000-0001-6015-2063\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Mannocci, Andrea\",\"orcid\":\"0000-0002-5193-7851\"},{\"affiliation\":\"University of Warsaw\",\"name\":\"Horst, Marek\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Czerniak, Andreas\",\"orcid\":\"0000-0003-3883-4169\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Kiatropoulou, Katerina\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Kokogiannaki, Argiro\",\"orcid\":\"0000-0002-3880-0244\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"De Bonis, Michele\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Artini, Michele\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Ottonello, Enrico\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Lempesis, Antonis\"},{\"affiliation\":\"CERN\",\"name\":\"Ioannidis, Alexandros\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Summan, Friedrich\"}],\"description\":\"\\u003cp\\u003eThis dataset contains dumps of the OpenAIRE Research Graph containing metadata records relevant for the research communities and initiatives collaborating with OpenAIRE\\u003c/p\\u003e. \\u003cp\\u003eEach dataset is a zip containing a file with one json per line. Each json is compliant to the schema available at XXXX\\u003c/p\\u003e Note that the file that is offered is not a typical json file: each line contains a separate, self-contained json object. For more information please see http://jsonlines.org\",\"grants\":[{\"id\":\"777541\"},{\"id\":\"824091\"},{\"id\":\"824323\"}],\"keywords\":[\"Open Science\",\"Scholarly Communication\",\"Information Science\"],\"language\":\"eng\",\"license\":\"CC-BY-4.0\",\"title\":\"OpenAIRE Research Graph: Dumps for research communities and initiatives.\",\"upload_type\":\"dataset\",\"version\":\"1.0\"}}";
|
||||||
|
|
||||||
|
System.out.println(client.sendMretadata(metadata));
|
||||||
|
|
||||||
|
System.out.println(client.publish());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testNewVersion() throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
|
ACCESS_TOKEN);
|
||||||
|
|
||||||
|
client.newVersion("656628");
|
||||||
|
|
||||||
|
CommunityMap communityMap = new CommunityMap();
|
||||||
|
communityMap.put("ni", "Neuroinformatics");
|
||||||
|
communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage");
|
||||||
|
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
|
||||||
|
|
||||||
|
fs
|
||||||
|
.copyFromLocalFile(
|
||||||
|
false, new Path(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni")
|
||||||
|
.getPath()),
|
||||||
|
new Path(workingDir + "/zenodo/ni/ni"));
|
||||||
|
fs
|
||||||
|
.copyFromLocalFile(
|
||||||
|
false, new Path(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/dh-ch")
|
||||||
|
.getPath()),
|
||||||
|
new Path(workingDir + "/zenodo/dh-ch/dh-ch"));
|
||||||
|
|
||||||
|
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fs
|
||||||
|
.listFiles(
|
||||||
|
new Path(workingDir + "/zenodo"), true);
|
||||||
|
while (fileStatusListIterator.hasNext()) {
|
||||||
|
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||||
|
|
||||||
|
String p_string = fileStatus.getPath().toString();
|
||||||
|
|
||||||
|
int index = p_string.lastIndexOf("/");
|
||||||
|
String community = p_string.substring(0, index);
|
||||||
|
community = community.substring(community.lastIndexOf("/") + 1);
|
||||||
|
String community_name = communityMap.get(community).replace(" ", "_");
|
||||||
|
// fs.copyToLocalFile(fileStatus.getPath(), new Path("/tmp/" + community_name));
|
||||||
|
System.out.println(community);
|
||||||
|
|
||||||
|
// File f = new File("/tmp/" + community_name);
|
||||||
|
FSDataInputStream inputStream = fs.open(fileStatus.getPath());
|
||||||
|
System.out.println(client.uploadIS(inputStream, community_name, fileStatus.getLen()));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println(client.publish());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void readCommunityMap() throws IOException {
|
||||||
|
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
|
||||||
|
System.out
|
||||||
|
.println(
|
||||||
|
new Gson()
|
||||||
|
.toJson(
|
||||||
|
Utils
|
||||||
|
.readCommunityMap(
|
||||||
|
fs, getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath())));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,173 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.lenient;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.LocalFileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||||
|
import org.apache.hadoop.io.compress.CompressionCodecFactory;
|
||||||
|
import org.junit.jupiter.api.*;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchCommunity;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
public class CreateEntityTest {
|
||||||
|
|
||||||
|
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
|
||||||
|
+
|
||||||
|
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
|
||||||
|
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
|
||||||
|
+
|
||||||
|
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
|
||||||
|
|
||||||
|
List<String> communityMap = Arrays
|
||||||
|
.asList(
|
||||||
|
"clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin@@ri",
|
||||||
|
"ee@@Sustainable Development Solutions Network - Greece@@The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. "
|
||||||
|
+
|
||||||
|
"SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.@@SDG13 - Climate action,SDG8 - Decent work and economic growth,SDG15 - "
|
||||||
|
+
|
||||||
|
"Life on land,SDG2 - Zero hunger,SDG17 - Partnerships for the ´goals,SDG10 - Reduced inequalities,SDG5 - Gender equality,SDG12 - Responsible consumption and production,SDG14 - Life below water,SDG6 - Clean water and sanitation,SDG11 - Sustainable cities and communities,SDG1 - No poverty,SDG3 - Good health and well being,SDG7 - Affordable and clean energy,SDG4 - Quality education,SDG9 - Industry innovation and infrastructure,SDG16 - Peace justice and strong institutions@@oac_sdsn-greece@@community",
|
||||||
|
"dh-ch@@Digital Humanities and Cultural Heritage@@This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.@@modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels@@oac_dh-ch@@community",
|
||||||
|
"fam@@Fisheries and Aquaculture Management@@Conservation of marine resources for sustainable development. The Fisheries and Aquaculture community focus on resources (document, data, codes..) which have been produced in the framework of projects (H2020, FP7, ..) related to the domain of fisheries and aquaculture.@@Stock Assessment,pelagic,Acoustic,Fish farming,Fisheries,Fishermen,maximum sustainable yield,trawler,Fishing vessel,Fisherman,Fishing gear,mackerel,RFMO,Fish Aggregating Device,Bycatch,Fishery,common fisheries policy,Fishing fleet,Aquaculture@@fisheries@@community",
|
||||||
|
"ni@@Neuroinformatics@@The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.@@brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities@@oac_ni@@community",
|
||||||
|
"mes@@European Marine Science@@This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).@@marine,ocean,fish,aqua,sea@@oac_mes@@community",
|
||||||
|
"instruct@@Instruct-ERIC@@Instruct-ERIC is the European Research Infrastructure for Structural Biology@@@@oac_instruct@@community",
|
||||||
|
"elixir-gr@@The Greek National Node of the ESFRI European RI ELIXIR@@ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.@@@@oaa_elixir-gr@@ri",
|
||||||
|
"aginfra@@Agricultural and Food Sciences@@The scope of this community is to provide access to publications, research data, projects and software that are related to agricultural and food sciences@@animal production and health,fisheries and aquaculture,food safety and human nutrition,information management,food technology,agri-food education and extension,natural resources and environment,food system,engineering technology and Research,agriculture,food safety risk assessment,food security,farming practices and systems,plant production and protection,agri-food economics and policy,Agri-food,food distribution,forestry@@oac_aginfra@@community",
|
||||||
|
"dariah@@DARIAH EU@@The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. @@@@dariah@@ri",
|
||||||
|
"epos@@European Plate Observing System@@EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of data, data products, and facilities from distributed research infrastructures for solid Earth science in Europe.@@@@@@ri",
|
||||||
|
"covid-19@@Corona Virus Disease@@This portal provides access to publications, research data, projects and software that may be relevant to the Corona Virus Disease (COVID-19). The OpenAIRE COVID-19 Gateway aggregates COVID-19 related records, links them and provides a single access point for discovery and navigation. We tag content from the OpenAIRE Research Graph (10,000+ data sources) and additional sources. All COVID-19 related research results are linked to people, organizations and projects, providing a contextualized navigation.@@COVID19,SARS-CoV,HCoV-19,mesh:C000657245,MERS-CoV,Síndrome Respiratorio Agudo Severo,mesh:COVID-19,COVID2019,COVID-19,SARS-CoV-2,2019 novel coronavirus,severe acute respiratory syndrome coronavirus 2,Orthocoronavirinae,Coronaviridae,mesh:D045169,coronavirus,SARS,coronaviruses,coronavirus disease-19,sars cov 2,Middle East Respiratory Syndrome,Severe acute respiratory syndrome coronavirus 2,Severe Acute Respiratory Syndrome,coronavirus disease 2019,2019-nCoV@@covid-19@@community");
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
private ISLookUpService isLookUpService;
|
||||||
|
|
||||||
|
private QueryInformationSystem queryInformationSystem;
|
||||||
|
|
||||||
|
private static String workingDir;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void setUp() throws ISLookUpException {
|
||||||
|
lenient().when(isLookUpService.quickSearchProfile(XQUERY_ENTITY)).thenReturn(communityMap);
|
||||||
|
queryInformationSystem = new QueryInformationSystem();
|
||||||
|
queryInformationSystem.setIsLookUp(isLookUpService);
|
||||||
|
}
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files
|
||||||
|
.createTempDirectory(CreateEntityTest.class.getSimpleName())
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void test1() throws ISLookUpException, IOException {
|
||||||
|
List<ContextInfo> cInfoList = new ArrayList<>();
|
||||||
|
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
|
||||||
|
queryInformationSystem.getContextInformation(consumer);
|
||||||
|
|
||||||
|
List<ResearchInitiative> riList = new ArrayList<>();
|
||||||
|
cInfoList.forEach(cInfo -> riList.add(Process.getEntity(cInfo)));
|
||||||
|
|
||||||
|
Assertions.assertEquals(12, riList.size());
|
||||||
|
|
||||||
|
riList.stream().forEach(c -> {
|
||||||
|
switch (c.getAcronym()) {
|
||||||
|
case "mes":
|
||||||
|
Assertions
|
||||||
|
.assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_COMMUNITY));
|
||||||
|
Assertions.assertTrue(((ResearchCommunity) c).getSubject().size() == 5);
|
||||||
|
Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("marine"));
|
||||||
|
Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("ocean"));
|
||||||
|
Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("fish"));
|
||||||
|
Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("aqua"));
|
||||||
|
Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("sea"));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
c
|
||||||
|
.getId()
|
||||||
|
.equals(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
|
||||||
|
DHPUtils.md5(c.getAcronym()))));
|
||||||
|
Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_mes"));
|
||||||
|
Assertions.assertTrue("mes".equals(c.getAcronym()));
|
||||||
|
break;
|
||||||
|
case "clarin":
|
||||||
|
Assertions
|
||||||
|
.assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_INFRASTRUCTURE));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
c
|
||||||
|
.getId()
|
||||||
|
.equals(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
|
||||||
|
DHPUtils.md5(c.getAcronym()))));
|
||||||
|
Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_clarin"));
|
||||||
|
Assertions.assertTrue("clarin".equals(c.getAcronym()));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// TODO add check for all the others Entities
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
riList.forEach(c -> System.out.println(new Gson().toJson(c)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Disabled
|
||||||
|
void test2() throws IOException, ISLookUpException {
|
||||||
|
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
|
||||||
|
|
||||||
|
Path hdfsWritePath = new Path(workingDir + "/prova");
|
||||||
|
FSDataOutputStream fsDataOutputStream = null;
|
||||||
|
if (fs.exists(hdfsWritePath)) {
|
||||||
|
fsDataOutputStream = fs.append(hdfsWritePath);
|
||||||
|
} else {
|
||||||
|
fsDataOutputStream = fs.create(hdfsWritePath);
|
||||||
|
}
|
||||||
|
CompressionCodecFactory factory = new CompressionCodecFactory(fs.getConf());
|
||||||
|
CompressionCodec codec = factory.getCodecByClassName("org.apache.hadoop.io.compress.GzipCodec");
|
||||||
|
|
||||||
|
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(codec.createOutputStream(fsDataOutputStream),
|
||||||
|
StandardCharsets.UTF_8));
|
||||||
|
|
||||||
|
List<ContextInfo> cInfoList = new ArrayList<>();
|
||||||
|
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
|
||||||
|
queryInformationSystem.getContextInformation(consumer);
|
||||||
|
|
||||||
|
for (ContextInfo cInfo : cInfoList) {
|
||||||
|
writer.write(new Gson().toJson(Process.getEntity(cInfo)));
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,723 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
class CreateRelationTest {
|
||||||
|
|
||||||
|
List<String> communityContext = Arrays
|
||||||
|
.asList(
|
||||||
|
"<context id=\"clarin\" label=\"CLARIN\" type=\"ri\">\n" +
|
||||||
|
" <param name=\"status\">all</param>\n" +
|
||||||
|
" <param name=\"description\">CLARIN</param>\n" +
|
||||||
|
" <param name=\"logourl\">https://www.clarin.eu/sites/default/files/clarin-frontpage-logo.jpg</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"name\">Common Language Resources and Technology Infrastructure</param>\n" +
|
||||||
|
" <param name=\"manager\">maria@clarin.eu,dieter@clarin.eu,f.m.g.dejong@uu.nl,paolo.manghi@isti.cnr.it</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"subject\"/>\n" +
|
||||||
|
" <param name=\"suggestedAcknowledgement\">(Part of) the work reported here was made possible by using the CLARIN infrastructure.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"suggestedAcknowledgement\">The work reported here has received funding through <CLARIN national consortium member, e.g. CLARIN.SI>, <XYZ> project, grant no. <XYZ>.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"suggestedAcknowledgement\">The work reported here has received funding (through CLARIN ERIC) from the European Union’s Horizon 2020 research and innovation programme under grant agreement No <0-9> for project <XYZ>.\n"
|
||||||
|
+
|
||||||
|
" (E.g. No 676529 for project CLARIN-PLUS.)</param>\n" +
|
||||||
|
" <param name=\"zenodoCommunity\">oac_clarin</param>\n" +
|
||||||
|
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
|
||||||
|
" <category claim=\"true\" id=\"clarin::projects\" label=\"CLARIN Projects\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"clarin::projects::1\" label=\"CLARIN-PLUS\">\n" +
|
||||||
|
" <param name=\"projectfullname\">CLARIN-PLUS</param>\n" +
|
||||||
|
" <param name=\"suggestedAcknowledgement\"/>\n" +
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">676529</param>\n" +
|
||||||
|
" <param name=\"url\">http://www.clarin.eu</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"funding\">H2020-INFRADEV-1-2015-1</param>\n" +
|
||||||
|
" <param name=\"acronym\">CLARIN+</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"clarin::projects::2\" label=\"CLARIN\">\n" +
|
||||||
|
" <param name=\"projectfullname\">Common Language Resources and Technology Infrastructure</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"acronym\">CLARIN</param>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">212230</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"openaireId\">corda_______::ef782b2d85676aa3e5a907427feb18c4</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"false\" id=\"clarin::contentproviders\" label=\"CLARIN Content providers\">" +
|
||||||
|
"<!--<concept claim=\"true\" id=\"clarin::contentproviders::1\" label=\"Zotero\">\n" +
|
||||||
|
" <param name=\"openaireId\">opendoar____::d96409bf894217686ba124d7356686c9</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"name\">Public Knowledge Project EPrint Archive</param>\n" +
|
||||||
|
" <param name=\"officialname\">Public Knowledge Project EPrint Archive</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept> -->\n" +
|
||||||
|
" <concept claim=\"false\" id=\"clarin::contentproviders::2\" label=\"\">\n" +
|
||||||
|
" <param name=\"name\">LINDAT/CLARIN repository</param>\n" +
|
||||||
|
" <param name=\"officialname\">LINDAT/CLARIN repository</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"true\" id=\"clarin::subcommunity\" label=\"CLARIN communities\">\n" +
|
||||||
|
" <concept claim=\"true\" id=\"clarin::subcommunity::1\" label=\"CLARIN-D\">\n" +
|
||||||
|
" <param name=\"fullname\">CLARIN-D</param>\n" +
|
||||||
|
" <param name=\"homepageUrl\">https://www.clarin-d.de/en/</param>\n" +
|
||||||
|
" <param name=\"suggestedAcknowledgement\"/>\n" +
|
||||||
|
" <param name=\"example\">http://www.lrec-conf.org/proceedings/lrec2018/pdf/504.pdf</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"nation\">Germany</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"false\" id=\"clarin::zenodocommunities\" label=\"CLARIN Zenodo Communities\"/>\n"
|
||||||
|
+
|
||||||
|
" <category claim=\"false\" id=\"clarin::organizations\" label=\"CLARIN Organizations\"/>\n" +
|
||||||
|
"</context>",
|
||||||
|
"<context id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\" type=\"community\">\n" +
|
||||||
|
" <param name=\"status\">all</param>\n" +
|
||||||
|
" <param name=\"description\">This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"logourl\">http://sanmamante.org/DH_CH_logo.png</param>\n" +
|
||||||
|
" <param name=\"name\">Digital Humanities and Cultural Heritage</param>\n" +
|
||||||
|
" <param name=\"manager\">ileniagalluccio87@gmail.com,achille.felicetti@gmail.com,paolo.manghi@isti.cnr.it,tim.evans@york.ac.uk</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"subject\">modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"suggestedAcknowledgement\">The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"zenodoCommunity\">oac_dh-ch</param>\n" +
|
||||||
|
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
|
||||||
|
" <category claim=\"false\" id=\"dh-ch::projects\" label=\"DH-CH Projects\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::projects::1\" label=\"Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"suggestedAcknowledgement\">The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">654119</param>\n" +
|
||||||
|
" <param name=\"url\">http://www.parthenos-project.eu</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">PARTHENOS</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"false\" id=\"dh-ch::contentproviders\" label=\"DH-CH Content providers\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::2\" label=\"The UK's largest collection of digital research data in the social sciences and humanities\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</param>\n" +
|
||||||
|
" <param name=\"name\">The UK's largest collection of digital research data in the social sciences and humanities</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"officialname\">UK Data Archive</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::3\" label=\"Journal of Data Mining and Digital Humanities\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">doajarticles::c6cd4b532e12868c1d760a8d7cda6815</param>\n" +
|
||||||
|
" <param name=\"name\">Journal of Data Mining and Digital Humanities</param>\n" +
|
||||||
|
" <param name=\"officialname\">Journal of Data Mining and Digital Humanities</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::6\" label=\"Frontiers in Digital Humanities\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</param>\n" +
|
||||||
|
" <param name=\"name\">Frontiers in Digital Humanities</param>\n" +
|
||||||
|
" <param name=\"officialname\">Frontiers in Digital Humanities</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::7\" label=\"Il Capitale Culturale: Studies on the Value of Cultural Heritage\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</param>\n" +
|
||||||
|
" <param name=\"name\">Il Capitale Culturale: Studies on the Value of Cultural Heritage</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"officialname\">Il Capitale Culturale: Studies on the Value of Cultural Heritage</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::8\" label=\"Conservation Science in Cultural Heritage\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">doajarticles::0da84e9dfdc8419576169e027baa8028</param>\n" +
|
||||||
|
" <param name=\"name\">Conservation Science in Cultural Heritage</param>\n" +
|
||||||
|
" <param name=\"officialname\">Conservation Science in Cultural Heritage</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::9\" label=\"Electronic Archiving System\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">re3data_____::84e123776089ce3c7a33db98d9cd15a8</param>\n" +
|
||||||
|
" <param name=\"name\">Electronic Archiving System</param>\n" +
|
||||||
|
" <param name=\"officialname\">EASY</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::10\" label=\"DANS-KB Harvester\">\n" +
|
||||||
|
" <param name=\"openaireId\">openaire____::c5502a43e76feab55dd00cf50f519125</param>\n" +
|
||||||
|
" <param name=\"name\">DANS-KB Harvester</param>\n" +
|
||||||
|
" <param name=\"officialname\">Gemeenschappelijke Harvester DANS-KB</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::11\" label=\"ads\">\n" +
|
||||||
|
" <param name=\"openaireId\">re3data_____::a48f09c562b247a9919acfe195549b47</param>\n" +
|
||||||
|
" <param name=\"name\">ads</param>\n" +
|
||||||
|
" <param name=\"officialname\">Archaeology Data Service</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::12\" label=\"\">\n" +
|
||||||
|
" <param name=\"openaireId\">opendoar____::97275a23ca44226c9964043c8462be96</param>\n" +
|
||||||
|
" <param name=\"name\">KNAW Repository</param>\n" +
|
||||||
|
" <param name=\"officialname\">KNAW Repository</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::13\" label=\"Internet Archaeology\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">doajarticles::2899208a99aa7d142646e0a80bfeef05</param>\n" +
|
||||||
|
" <param name=\"name\">Internet Archaeology</param>\n" +
|
||||||
|
" <param name=\"officialname\">Internet Archaeology</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" <param name=\"selcriteria\"/>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
"</context>\n",
|
||||||
|
"<context id=\"ni\" label=\"Neuroinformatics\" type=\"community\">\n" +
|
||||||
|
" <param name=\"status\">all</param>\n" +
|
||||||
|
" <param name=\"description\">The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"logourl\">https://docs.google.com/drawings/u/0/d/10e191xGoGf4uaRluMqbt_7cCj6LSCs2a29im4CmWjqU/export/png</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"name\">Neuroinformatics</param>\n" +
|
||||||
|
" <param name=\"manager\">sorina.pop@creatis.insa-lyon.fr,camille.maumet@inria.fr,christian.barillot@irisa.fr,xavier.rolland@irisa.fr,axel.bonnet@creatis.insa-lyon.fr,paolo.manghi@isti.cnr.it</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"subject\">brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"suggestedAcknowledgement\"/>\n" +
|
||||||
|
" <param name=\"zenodoCommunity\">oac_ni</param>\n" +
|
||||||
|
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
|
||||||
|
" <category claim=\"false\" id=\"ni::projects\" label=\"NI Content providers\"/>\n" +
|
||||||
|
" <category claim=\"false\" id=\"ni::contentproviders\" label=\"NI Content providers\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::contentproviders::1\" label=\"OpenNeuro\">\n" +
|
||||||
|
" <param name=\"openaireId\">re3data_____::5b9bf9171d92df854cf3c520692e9122</param>\n" +
|
||||||
|
" <param name=\"name\">Formerly:OpenFMRI</param>\n" +
|
||||||
|
" <param name=\"officialname\">OpenNeuro</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::contentproviders::2\" label=\"RIO\">\n" +
|
||||||
|
" <param name=\"openaireId\">doajarticles::c7d3de67dc77af72f6747157441252ec</param>\n" +
|
||||||
|
" <param name=\"name\">Research Ideas and Outcomes</param>\n" +
|
||||||
|
" <param name=\"officialname\">Research Ideas and Outcomes</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::contentproviders::3\" label=\"NITRC\">\n" +
|
||||||
|
" <param name=\"openaireId\">re3data_____::8515794670370f49c1d176c399c714f5</param>\n" +
|
||||||
|
" <param name=\"name\">Neuroimaging Informatics Tools and Resources Clearinghouse</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"officialname\">NITRC</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::contentproviders::4\" label=\"FRONTIERSNI\">\n" +
|
||||||
|
" <param name=\"openaireId\">doajarticles::d640648c84b10d425f96f11c3de468f3</param>\n" +
|
||||||
|
" <param name=\"name\">Frontiers in Neuroinformatics</param>\n" +
|
||||||
|
" <param name=\"officialname\">Frontiers in Neuroinformatics</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::contentproviders::5\" label=\"NeuroImage: Clinical\">\n" +
|
||||||
|
" <param name=\"openaireId\">doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</param>\n" +
|
||||||
|
" <param name=\"name\">NeuroImage: Clinical</param>\n" +
|
||||||
|
" <param name=\"officialname\">NeuroImage: Clinical</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::contentproviders::6\" label=\"NeuroVault\">\n" +
|
||||||
|
" <param name=\"openaireId\">rest________::fb1a3d4523c95e63496e3bc7ba36244b</param>\n" +
|
||||||
|
" <param name=\"name\">NeuroVault</param>\n" +
|
||||||
|
" <param name=\"officialname\">NeuroVault</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
"</context>\n",
|
||||||
|
"<context id=\"instruct\" label=\"Instruct-ERIC\" type=\"ri\">\n" +
|
||||||
|
" <param name=\"status\">all</param>\n" +
|
||||||
|
" <param name=\"description\">Instruct-ERIC is the European Research Infrastructure for Structural Biology</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"logourl\">https://instruct-eric.eu/templates/instructeric/images/logos/instruct-eric-logo-noline.png</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"name\">Instruct-ERIC</param>\n" +
|
||||||
|
" <param name=\"manager\">claudia@instruct-eric.eu,carazo@cnb.csic.es,echrysina@eie.gr,susan@instruct-eric.eu,naomi@instruct-eric.eu,natalie@instruct-eric.eu,pmarie@igbmc.fr,darren.hart@ibs.fr,claudia@strubi.ox.ac.uk,paolo.manghi@isti.cnr.it</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"subject\"/>\n" +
|
||||||
|
" <param name=\"suggestedAcknowledgement\">The authors acknowledge the support and the use of resources of Instruct-ERIC.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"suggestedAcknowledgement\">The authors acknowledge the support and the use of resources of Instruct (PID # or APPID #), a Landmark ESFRI project</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"zenodoCommunity\">oac_instruct</param>\n" +
|
||||||
|
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
|
||||||
|
" <category claim=\"false\" id=\"instruct::projects\" label=\"Instruct-ERIC Projects\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::1\" label=\"Authentication and Authorisation For Research and Collaboration\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Authentication and Authorisation For Research and Collaboration</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">730941</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-EINFRA-2016-1</param>\n" +
|
||||||
|
" <param name=\"acronym\">AARC2</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::2\" label=\"Building data bridges between biological and medical infrastructures in Europe\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Building data bridges between biological and medical infrastructures in Europe</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">284209</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">FP7-INFRASTRUCTURES-2011-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">BioMedBridges</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::3\" label=\"Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">283570</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">FP7-INFRASTRUCTURES-2011-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">BioStruct-X</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::4\" label=\"Coordinated Research Infrastructures Building Enduring Life-science services\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Coordinated Research Infrastructures Building Enduring Life-science services</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">654248</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-INFRADEV-1-2014-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">CORBEL</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::5\" label=\"Infrastructure for NMR, EM and X-rays for translational research\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Infrastructure for NMR, EM and X-rays for translational research</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">653706</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-INFRAIA-2014-2015</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">iNEXT</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::6\" label=\"Integrated Structural Biology Infrastructure\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Integrated Structural Biology Infrastructure</param>\n" +
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">211252</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">FP7-INFRASTRUCTURES-2007-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">INSTRUCT</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::7\" label=\"Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">731005</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-INFRADEV-2016-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">INSTRUCT-ULTRA</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::8\" label=\"Opening Synchrotron Light for Experimental Science and Applications in the Middle East\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Opening Synchrotron Light for Experimental Science and Applications in the Middle East</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">730943</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-INFRASUPP-2016-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">OPEN SESAME</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::9\" label=\"Infrastructure for Protein Production Platforms\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Infrastructure for Protein Production Platforms</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">227764</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">FP7-INFRASTRUCTURES-2008-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">PCUBE</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::10\" label=\"European Vaccine Research and Development Infrastructure\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">European Vaccine Research and Development Infrastructure</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">730964</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-INFRAIA-2016-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">TRAMSVAC2</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::11\" label=\"World-wide E-infrastructure for structural biology\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">World-wide E-infrastructure for structural biology</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">675858</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">EC | H2020 | RIA</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">West-Life</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::12\" label=\"RI-VIS\">\n" +
|
||||||
|
" <param name=\"projectfullname\">Expanding research infrastructure visibility to strengthen strategic partnerships</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"acronym\">RI-VIS</param>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">824063</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"openaireId\">corda__h2020::af93b591b76991d8437993a8f6fc6538</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"false\" id=\"instruct::contentproviders\" label=\"Instruct-ERIC Content providers\"/>\n"
|
||||||
|
+
|
||||||
|
" <category claim=\"false\" id=\"instruct::zenodocommunities\" label=\"Instruct-ERIC Zenodo Communities\">\n"
|
||||||
|
+
|
||||||
|
" <concept claim=\"false\" id=\"instruct::zenodocommunities::1\" label=\"Instruct\">\n" +
|
||||||
|
" <param name=\"zenodoid\">instruct</param>\n" +
|
||||||
|
" <param name=\"selcriteria\"/>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::zenodocommunities::2\" label=\"West-Life Virtual Research Environment for Structural Biology\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"zenodoid\">west-life</param>\n" +
|
||||||
|
" <param name=\"selcriteria\"/>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"false\" id=\"instruct::organizations\" label=\"Instruct-ERIC Organizations\">\n"
|
||||||
|
+
|
||||||
|
" <concept claim=\"false\" id=\"instruct::organizations::1\" label=\"FRISBI\">\n" +
|
||||||
|
" <param name=\"name\">FRISBI</param>\n" +
|
||||||
|
" <param name=\"logourl\">aHR0cDovL2ZyaXNiaS5ldS9zdGF0aWMvaW1hZ2VzL2xvZ29zL2xvZ28tZnJpc2JpLnBuZw==</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"websiteurl\">aHR0cDovL2ZyaXNiaS5ldS8=</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::organizations::2\" label=\"RI-VIS\">\n" +
|
||||||
|
" <param name=\"name\">RI-VIS</param>\n" +
|
||||||
|
" <param name=\"logourl\">aHR0cHM6Ly9yaS12aXMuZXUvbmV0d29yay9yaXZpcy90ZW1wbGF0ZXMvcml2aXMvaW1hZ2VzL1JJLVZJU0xvZ29GaW5hbC0wNi5wbmc=</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"websiteurl\">aHR0cHM6Ly9yaS12aXMuZXU=</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::organizations::3\" label=\"CIISB\">\n" +
|
||||||
|
" <param name=\"name\">CIISB</param>\n" +
|
||||||
|
" <param name=\"logourl\">aHR0cDovL2JpYy5jZWl0ZWMuY3ovZmlsZXMvMjkyLzEyNS5KUEc=</param>\n" +
|
||||||
|
" <param name=\"websiteurl\">aHR0cHM6Ly93d3cuY2lpc2Iub3Jn</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
"</context>\n",
|
||||||
|
"<context id=\"elixir-gr\" label=\"ELIXIR GR\" type=\"ri\">\n" +
|
||||||
|
" <param name=\"status\">all</param>\n" +
|
||||||
|
" <param name=\"description\">ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"logourl\">https://elixir-greece.org/sites/default/files/ELIXIR_GREECE_white_background.png</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"name\">The Greek National Node of the ESFRI European RI ELIXIR</param>\n" +
|
||||||
|
" <param name=\"manager\">vergoulis@imis.athena-innovation.gr,schatz@imis.athena-innovation.gr,paolo.manghi@isti.cnr.it</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"subject\"/>\n" +
|
||||||
|
" <param name=\"suggestedAcknowledgement\"/>\n" +
|
||||||
|
" <param name=\"zenodoCommunity\">oaa_elixir-gr</param>\n" +
|
||||||
|
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
|
||||||
|
" <category claim=\"false\" id=\"elixir-gr::projects\" label=\"ELIXIR GR Projects\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::projects::12\" label=\"\">\n" +
|
||||||
|
" <param name=\"projectfullname\">BIO-INFORMATICS RESEARCH NETWORK COORDINATING CENTER (BIRN-CC)</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"acronym\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">1U24RR025736-01</param>\n" +
|
||||||
|
" <param name=\"funder\">NIH</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::projects::13\" label=\"\">\n" +
|
||||||
|
" <param name=\"projectfullname\">COLLABORATIVE RESEARCH: The Cognitive Neuroscience of Category Learning</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"acronym\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">0223843</param>\n" +
|
||||||
|
" <param name=\"funder\">NSF</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::projects::14\" label=\"\">\n" +
|
||||||
|
" <param name=\"projectfullname\">The Cognitive Atlas: Developing an Interdisciplinary Knowledge Base Through Socia</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"acronym\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">5R01MH082795-05</param>\n" +
|
||||||
|
" <param name=\"funder\">NIH</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::projects::15\" label=\"\">\n" +
|
||||||
|
" <param name=\"projectfullname\">Fragmented early life environmental and emotional / cognitive vulnerabilities</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"acronym\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">1P50MH096889-01A1</param>\n" +
|
||||||
|
" <param name=\"funder\">NIH</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::projects::16\" label=\"\">\n" +
|
||||||
|
" <param name=\"projectfullname\">Enhancement of the 1000 Functional Connectome Project</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"acronym\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">1R03MH096321-01A1</param>\n" +
|
||||||
|
" <param name=\"funder\">TUBITAK</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::projects::17\" label=\"\">\n" +
|
||||||
|
" <param name=\"projectfullname\">CRCNS Data Sharing: An open data repository for cognitive neuroscience: The OpenfMRI Project</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"acronym\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">1131441</param>\n" +
|
||||||
|
" <param name=\"funder\">NSF</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::projects::18\" label=\"\">\n" +
|
||||||
|
" <param name=\"projectfullname\">Enhancing Human Cortical Plasticity: Visual Psychophysics and fMRI</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"acronym\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">0121950</param>\n" +
|
||||||
|
" <param name=\"funder\">NSF</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::projects::18\" label=\"\">\n" +
|
||||||
|
" <param name=\"projectfullname\">Transforming statistical methodology for neuroimaging meta-analysis.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"acronym\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">100309</param>\n" +
|
||||||
|
" <param name=\"funder\">WT</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>" +
|
||||||
|
|
||||||
|
" <category claim=\"false\" id=\"elixir-gr::contentproviders\" label=\"Elixir-GR Content providers\">\n"
|
||||||
|
+
|
||||||
|
" <concept claim=\"false\" id=\"elixir-gr::contentproviders::1\" label=\"bio.tools\">\n" +
|
||||||
|
" <param name=\"openaireId\">rest________::b8e502674c3c3499d5374e9b2ea6d8d5</param>\n" +
|
||||||
|
" <param name=\"name\">bio.tools</param>\n" +
|
||||||
|
" <param name=\"officialname\">bio.tools</param>\n" +
|
||||||
|
" <param name=\"enabled\">false</param>\n" +
|
||||||
|
" <param name=\"selcriteria\"/>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"false\" id=\"elixir-gr::zenodocommunities\" label=\"Elixir-GR Zenodo Communities\"/>\n"
|
||||||
|
+
|
||||||
|
" <category claim=\"false\" id=\"elixir-gr::organizations\" label=\"Elixir-GR Organizations\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"elixir-gr::organizations::1\" label=\"ATHENA RC\">\n" +
|
||||||
|
" <param name=\"name\">ATHENA RC</param>\n" +
|
||||||
|
" <param name=\"logourl\">aHR0cHM6Ly9lbGl4aXItZ3JlZWNlLm9yZy9zaXRlcy9kZWZhdWx0L2ZpbGVzL3N0eWxlcy90aHVtYm5haWwvcHVibGljL3BhcnRuZXJfbG9nb3MvYXRoZW5hX2xvZ28uanBnP2l0b2s9VXdGWFNpZng=</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"websiteurl\">aHR0cHM6Ly93d3cuYXRoZW5hLWlubm92YXRpb24uZ3IvZW4=</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category><!-- <category claim=\"false\" id=\"elixir-gr::resultorganizations\" label=\"Elixir-GR Results through organizations\"/> -->\n"
|
||||||
|
+
|
||||||
|
"</context>");
|
||||||
|
|
||||||
|
private QueryInformationSystem queryInformationSystem;
|
||||||
|
|
||||||
|
private Map<String, String> map;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void setUp() {
|
||||||
|
|
||||||
|
queryInformationSystem = new QueryInformationSystem();
|
||||||
|
queryInformationSystem.setContextRelationResult(communityContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void test1() {
|
||||||
|
List<ContextInfo> cInfoList = new ArrayList<>();
|
||||||
|
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
|
||||||
|
|
||||||
|
queryInformationSystem
|
||||||
|
.getContextRelation(consumer, "contentproviders", ModelSupport.getIdPrefix(Datasource.class));
|
||||||
|
|
||||||
|
cInfoList.forEach(c -> System.out.println(new Gson().toJson(c)));
|
||||||
|
|
||||||
|
List<Relation> rList = new ArrayList<>();
|
||||||
|
|
||||||
|
cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add));
|
||||||
|
|
||||||
|
Assertions.assertEquals(34, rList.size());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
rList
|
||||||
|
.stream()
|
||||||
|
.map(r -> r.getSource().getId())
|
||||||
|
.collect(Collectors.toSet())
|
||||||
|
.contains(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"%s|%s::%s", Constants.CONTEXT_ID,
|
||||||
|
Constants.CONTEXT_NS_PREFIX,
|
||||||
|
DHPUtils.md5("dh-ch"))));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
10,
|
||||||
|
rList
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
r -> r
|
||||||
|
.getSource()
|
||||||
|
.getId()
|
||||||
|
.equals(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"%s|%s::%s", Constants.CONTEXT_ID,
|
||||||
|
Constants.CONTEXT_NS_PREFIX,
|
||||||
|
DHPUtils.md5("dh-ch"))))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.size());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
10,
|
||||||
|
rList
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
r -> r
|
||||||
|
.getTarget()
|
||||||
|
.getId()
|
||||||
|
.equals(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"%s|%s::%s", Constants.CONTEXT_ID,
|
||||||
|
Constants.CONTEXT_NS_PREFIX,
|
||||||
|
DHPUtils.md5("dh-ch"))))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.size());
|
||||||
|
|
||||||
|
Set<String> tmp = rList
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
r -> r
|
||||||
|
.getSource()
|
||||||
|
.getId()
|
||||||
|
.equals(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"%s|%s::%s", Constants.CONTEXT_ID,
|
||||||
|
Constants.CONTEXT_NS_PREFIX,
|
||||||
|
DHPUtils.md5("dh-ch"))))
|
||||||
|
.map(r -> r.getTarget().getId())
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
tmp.contains("10|re3data_____::9ebe127e5f3a0bf401875690f3bb6b81") &&
|
||||||
|
tmp.contains("10|doajarticles::c6cd4b532e12868c1d760a8d7cda6815") &&
|
||||||
|
tmp.contains("10|doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b") &&
|
||||||
|
tmp.contains("10|doajarticles::6eb31d13b12bc06bbac06aef63cf33c9") &&
|
||||||
|
tmp.contains("10|doajarticles::0da84e9dfdc8419576169e027baa8028") &&
|
||||||
|
tmp.contains("10|re3data_____::84e123776089ce3c7a33db98d9cd15a8") &&
|
||||||
|
tmp.contains("10|openaire____::c5502a43e76feab55dd00cf50f519125") &&
|
||||||
|
tmp.contains("10|re3data_____::a48f09c562b247a9919acfe195549b47") &&
|
||||||
|
tmp.contains("10|opendoar____::97275a23ca44226c9964043c8462be96") &&
|
||||||
|
tmp.contains("10|doajarticles::2899208a99aa7d142646e0a80bfeef05"));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test2() {
|
||||||
|
List<ContextInfo> cInfoList = new ArrayList<>();
|
||||||
|
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
|
||||||
|
|
||||||
|
queryInformationSystem
|
||||||
|
.getContextRelation(consumer, "projects", ModelSupport.getIdPrefix(Project.class));
|
||||||
|
|
||||||
|
cInfoList.forEach(c -> System.out.println(new Gson().toJson(c)));
|
||||||
|
|
||||||
|
List<Relation> rList = new ArrayList<>();
|
||||||
|
|
||||||
|
cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add));
|
||||||
|
|
||||||
|
Assertions.assertEquals(44, rList.size());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertFalse(
|
||||||
|
rList
|
||||||
|
.stream()
|
||||||
|
.map(r -> r.getSource().getId())
|
||||||
|
.collect(Collectors.toSet())
|
||||||
|
.contains(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"%s|%s::%s", Constants.CONTEXT_ID,
|
||||||
|
Constants.CONTEXT_NS_PREFIX,
|
||||||
|
DHPUtils.md5("dh-ch"))));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
2,
|
||||||
|
rList
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
r -> r
|
||||||
|
.getSource()
|
||||||
|
.getId()
|
||||||
|
.equals(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"%s|%s::%s", Constants.CONTEXT_ID,
|
||||||
|
Constants.CONTEXT_NS_PREFIX,
|
||||||
|
DHPUtils.md5("clarin"))))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.size());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
2,
|
||||||
|
rList
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
r -> r
|
||||||
|
.getTarget()
|
||||||
|
.getId()
|
||||||
|
.equals(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"%s|%s::%s", Constants.CONTEXT_ID,
|
||||||
|
Constants.CONTEXT_NS_PREFIX,
|
||||||
|
DHPUtils.md5("clarin"))))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.size());
|
||||||
|
|
||||||
|
Set<String> tmp = rList
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
r -> r
|
||||||
|
.getSource()
|
||||||
|
.getId()
|
||||||
|
.equals(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"%s|%s::%s", Constants.CONTEXT_ID,
|
||||||
|
Constants.CONTEXT_NS_PREFIX,
|
||||||
|
DHPUtils.md5("clarin"))))
|
||||||
|
.map(r -> r.getTarget().getId())
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
tmp.contains("40|corda__h2020::b5a4eb56bf84bef2ebc193306b4d423f") &&
|
||||||
|
tmp.contains("40|corda_______::ef782b2d85676aa3e5a907427feb18c4"));
|
||||||
|
|
||||||
|
rList.forEach(rel -> {
|
||||||
|
if (rel.getSource().getId().startsWith("40|")) {
|
||||||
|
String proj = rel.getSource().getId().substring(3);
|
||||||
|
Assertions.assertTrue(proj.substring(0, proj.indexOf("::")).length() == 12);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,157 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.api.java.function.ForeachFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
|
|
||||||
|
public class DumpOrganizationProjectDatasourceTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory
|
||||||
|
.getLogger(DumpOrganizationProjectDatasourceTest.class);
|
||||||
|
|
||||||
|
private static final HashMap<String, String> map = new HashMap<>();
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files
|
||||||
|
.createTempDirectory(DumpOrganizationProjectDatasourceTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(DumpOrganizationProjectDatasourceTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(DumpOrganizationProjectDatasourceTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void dumpOrganizationTest() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/organization")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpGraphEntities dg = new DumpGraphEntities();
|
||||||
|
|
||||||
|
dg.run(false, sourcePath, workingDir.toString() + "/dump", Organization.class, null);
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.graph.Organization> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/dump")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.graph.Organization.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.graph.Organization> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Organization.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(15, verificationDataset.count());
|
||||||
|
|
||||||
|
verificationDataset
|
||||||
|
.foreach(
|
||||||
|
(ForeachFunction<eu.dnetlib.dhp.schema.dump.oaf.graph.Organization>) o -> System.out
|
||||||
|
.println(OBJECT_MAPPER.writeValueAsString(o)));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void dumpProjectTest() throws NoAvailableEntityTypeException {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/project")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpGraphEntities dg = new DumpGraphEntities();
|
||||||
|
|
||||||
|
dg.run(false, sourcePath, workingDir.toString() + "/dump", Project.class, null);
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.graph.Project> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/dump")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.graph.Project.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.graph.Project> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Project.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(12, verificationDataset.count());
|
||||||
|
|
||||||
|
verificationDataset
|
||||||
|
.foreach(
|
||||||
|
(ForeachFunction<eu.dnetlib.dhp.schema.dump.oaf.graph.Project>) o -> System.out
|
||||||
|
.println(OBJECT_MAPPER.writeValueAsString(o)));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void dumpDatasourceTest() throws NoAvailableEntityTypeException {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasource")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
DumpGraphEntities dg = new DumpGraphEntities();
|
||||||
|
|
||||||
|
dg.run(false, sourcePath, workingDir.toString() + "/dump", Datasource.class, null);
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.graph.Datasource> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/dump")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.graph.Datasource.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.graph.Datasource> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Datasource.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(5, verificationDataset.count());
|
||||||
|
|
||||||
|
verificationDataset
|
||||||
|
.foreach(
|
||||||
|
(ForeachFunction<eu.dnetlib.dhp.schema.dump.oaf.graph.Datasource>) o -> System.out
|
||||||
|
.println(OBJECT_MAPPER.writeValueAsString(o)));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,305 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.api.java.function.ForeachFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.Row;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
|
||||||
|
|
||||||
|
public class DumpRelationTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory
|
||||||
|
.getLogger(DumpRelationTest.class);
|
||||||
|
|
||||||
|
private static final HashMap<String, String> map = new HashMap<>();
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files
|
||||||
|
.createTempDirectory(DumpRelationTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(DumpRelationTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(DumpRelationTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test1() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkDumpRelationJob.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/relation",
|
||||||
|
"-sourcePath", sourcePath
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Relation> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/relation")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
|
||||||
|
|
||||||
|
Dataset<Relation> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("table");
|
||||||
|
|
||||||
|
verificationDataset
|
||||||
|
.foreach((ForeachFunction<Relation>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
|
||||||
|
|
||||||
|
Dataset<Row> check = spark
|
||||||
|
.sql(
|
||||||
|
"SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance "
|
||||||
|
+
|
||||||
|
"from table ");
|
||||||
|
|
||||||
|
Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
22, check
|
||||||
|
.filter(
|
||||||
|
"name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " +
|
||||||
|
"provenance = 'Harvested'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(7, check.filter("name = 'isParticipant'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
7, check
|
||||||
|
.filter(
|
||||||
|
"name = 'isParticipant' and stype = 'organization' and ttype = 'project' " +
|
||||||
|
"and provenance = 'Harvested'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, check
|
||||||
|
.filter(
|
||||||
|
"name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " +
|
||||||
|
"and provenance = 'Inferred by OpenAIRE'")
|
||||||
|
.count());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test2() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation_validated")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkDumpRelationJob.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/relation",
|
||||||
|
"-sourcePath", sourcePath
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Relation> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/relation")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
|
||||||
|
|
||||||
|
Dataset<Relation> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("table");
|
||||||
|
|
||||||
|
verificationDataset
|
||||||
|
.foreach((ForeachFunction<Relation>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
|
||||||
|
|
||||||
|
Dataset<Row> check = spark
|
||||||
|
.sql(
|
||||||
|
"SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance "
|
||||||
|
+
|
||||||
|
"from table ");
|
||||||
|
|
||||||
|
Assertions.assertEquals(20, check.filter("name = 'isProvidedBy'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
20, check
|
||||||
|
.filter(
|
||||||
|
"name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " +
|
||||||
|
"provenance = 'Harvested'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(7, check.filter("name = 'isParticipant'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
7, check
|
||||||
|
.filter(
|
||||||
|
"name = 'isParticipant' and stype = 'organization' and ttype = 'project' " +
|
||||||
|
"and provenance = 'Harvested'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, check
|
||||||
|
.filter(
|
||||||
|
"name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " +
|
||||||
|
"and provenance = 'Inferred by OpenAIRE'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, check.filter("name = 'isProducedBy'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
2, check
|
||||||
|
.filter(
|
||||||
|
"name = 'isProducedBy' and stype = 'project' and ttype = 'result' " +
|
||||||
|
"and provenance = 'Harvested' and validated = true " +
|
||||||
|
"and validationDate = '2021-08-06'")
|
||||||
|
.count());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test3() throws Exception {//
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkDumpRelationJob.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/relation",
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-removeSet", "isParticipant"
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Relation> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/relation")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
|
||||||
|
|
||||||
|
Dataset<Relation> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("table");
|
||||||
|
|
||||||
|
verificationDataset
|
||||||
|
.foreach((ForeachFunction<Relation>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
|
||||||
|
|
||||||
|
Dataset<Row> check = spark
|
||||||
|
.sql(
|
||||||
|
"SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance "
|
||||||
|
+
|
||||||
|
"from table ");
|
||||||
|
|
||||||
|
Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
22, check
|
||||||
|
.filter(
|
||||||
|
"name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " +
|
||||||
|
"provenance = 'Harvested'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, check
|
||||||
|
.filter(
|
||||||
|
"name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " +
|
||||||
|
"and provenance = 'Inferred by OpenAIRE'")
|
||||||
|
.count());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test4() throws Exception {//
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkDumpRelationJob.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/relation",
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-removeSet", "isParticipant;isAuthorInstitutionOf"
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Relation> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/relation")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
|
||||||
|
|
||||||
|
Dataset<Relation> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("table");
|
||||||
|
|
||||||
|
verificationDataset
|
||||||
|
.foreach((ForeachFunction<Relation>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
|
||||||
|
|
||||||
|
Dataset<Row> check = spark
|
||||||
|
.sql(
|
||||||
|
"SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance "
|
||||||
|
+
|
||||||
|
"from table ");
|
||||||
|
|
||||||
|
Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
22, check
|
||||||
|
.filter(
|
||||||
|
"name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " +
|
||||||
|
"provenance = 'Harvested'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(0, check.filter("name = 'isAuthorInstitutionOf'").count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,175 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
|
||||||
|
|
||||||
|
public class ExtractRelationFromEntityTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory
|
||||||
|
.getLogger(ExtractRelationFromEntityTest.class);
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files
|
||||||
|
.createTempDirectory(ExtractRelationFromEntityTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(ExtractRelationFromEntityTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(ExtractRelationFromEntityTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void test1() {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
Extractor ex = new Extractor();
|
||||||
|
ex
|
||||||
|
.run(
|
||||||
|
false, sourcePath, workingDir.toString() + "/relation",
|
||||||
|
// eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath);
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath);
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Relation> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/relation")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<Relation> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
9,
|
||||||
|
verificationDataset.filter("source.id = '50|dedup_wf_001::15270b996fa8fd2fb5723daeab3685c3'").count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
9,
|
||||||
|
verificationDataset.filter("source.id = '50|dedup_wf_001::15270b996fa8fd2fb5723daxab3685c3'").count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"IsRelatedTo", verificationDataset
|
||||||
|
.filter((FilterFunction<Relation>) row -> row.getSource().getId().startsWith("00"))
|
||||||
|
.collectAsList()
|
||||||
|
.get(0)
|
||||||
|
.getReltype()
|
||||||
|
.getName());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"relationship", verificationDataset
|
||||||
|
.filter((FilterFunction<Relation>) row -> row.getSource().getId().startsWith("00"))
|
||||||
|
.collectAsList()
|
||||||
|
.get(0)
|
||||||
|
.getReltype()
|
||||||
|
.getType());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"context", verificationDataset
|
||||||
|
.filter((FilterFunction<Relation>) row -> row.getSource().getId().startsWith("00"))
|
||||||
|
.collectAsList()
|
||||||
|
.get(0)
|
||||||
|
.getSource()
|
||||||
|
.getType());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"result", verificationDataset
|
||||||
|
.filter((FilterFunction<Relation>) row -> row.getSource().getId().startsWith("00"))
|
||||||
|
.collectAsList()
|
||||||
|
.get(0)
|
||||||
|
.getTarget()
|
||||||
|
.getType());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"IsRelatedTo", verificationDataset
|
||||||
|
.filter((FilterFunction<Relation>) row -> row.getTarget().getId().startsWith("00"))
|
||||||
|
.collectAsList()
|
||||||
|
.get(0)
|
||||||
|
.getReltype()
|
||||||
|
.getName());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"relationship", verificationDataset
|
||||||
|
.filter((FilterFunction<Relation>) row -> row.getTarget().getId().startsWith("00"))
|
||||||
|
.collectAsList()
|
||||||
|
.get(0)
|
||||||
|
.getReltype()
|
||||||
|
.getType());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"context", verificationDataset
|
||||||
|
.filter((FilterFunction<Relation>) row -> row.getTarget().getId().startsWith("00"))
|
||||||
|
.collectAsList()
|
||||||
|
.get(0)
|
||||||
|
.getTarget()
|
||||||
|
.getType());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"result", verificationDataset
|
||||||
|
.filter((FilterFunction<Relation>) row -> row.getTarget().getId().startsWith("00"))
|
||||||
|
.collectAsList()
|
||||||
|
.get(0)
|
||||||
|
.getSource()
|
||||||
|
.getType());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,70 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Funder;
|
||||||
|
|
||||||
|
class FunderParsingTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testFunderTwoLevels() throws DocumentException {
|
||||||
|
|
||||||
|
String funding_Stream = "<fundingtree><funder><id>nsf_________::NSF</id><shortname>NSF</shortname><name>National Science "
|
||||||
|
+
|
||||||
|
"Foundation</name><jurisdiction>US</jurisdiction></funder><funding_level_1><id>nsf_________::NSF::CISE/OAD::CISE/CCF</id><description>Division "
|
||||||
|
+
|
||||||
|
"of Computing and Communication Foundations</description><name>Division of Computing and Communication " +
|
||||||
|
"Foundations</name><parent><funding_level_0><id>nsf_________::NSF::CISE/OAD</id><description>Directorate for "
|
||||||
|
+
|
||||||
|
"Computer & Information Science & Engineering</description><name>Directorate for Computer & " +
|
||||||
|
"Information Science & Engineering</name><parent/><class>nsf:fundingStream</class></funding_level_0></parent></funding_level_1></fundingtree>";
|
||||||
|
|
||||||
|
Funder f = DumpGraphEntities.getFunder(funding_Stream);
|
||||||
|
|
||||||
|
Assertions.assertEquals("NSF", f.getShortName());
|
||||||
|
Assertions.assertEquals("National Science Foundation", f.getName());
|
||||||
|
Assertions.assertEquals("US", f.getJurisdiction());
|
||||||
|
|
||||||
|
Assertions.assertEquals("NSF::CISE/OAD::CISE/CCF", f.getFunding_stream().getId());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"Directorate for Computer & Information Science & Engineering - Division of Computing and Communication Foundations",
|
||||||
|
f.getFunding_stream().getDescription());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testFunderThreeeLevels() throws DocumentException, SAXException {
|
||||||
|
String funding_stream = "<fundingtree><funder><id>ec__________::EC</id>" +
|
||||||
|
"<shortname>EC</shortname>" +
|
||||||
|
"<name>European Commission</name>" +
|
||||||
|
"<jurisdiction>EU</jurisdiction>" +
|
||||||
|
"</funder><funding_level_2>" +
|
||||||
|
"<id>ec__________::EC::H2020::ERC::ERC-COG</id>" +
|
||||||
|
"<description>Consolidator Grant</description>" +
|
||||||
|
"<name>ERC-COG</name><class>ec:h2020toas</class>" +
|
||||||
|
"<parent><funding_level_1><id>ec__________::EC::H2020::ERC</id>" +
|
||||||
|
"<description>European Research Council</description>" +
|
||||||
|
"<name>ERC</name><class>ec:h2020fundings</class><parent>" +
|
||||||
|
"<funding_level_0><id>ec__________::EC::H2020</id><name>H2020</name>" +
|
||||||
|
"<description>Horizon 2020 Framework Programme</description><parent/>" +
|
||||||
|
"<class>ec:h2020fundings</class></funding_level_0></parent></funding_level_1></parent></funding_level_2></fundingtree>";
|
||||||
|
|
||||||
|
Funder f = DumpGraphEntities.getFunder(funding_stream);
|
||||||
|
|
||||||
|
Assertions.assertEquals("EC", f.getShortName());
|
||||||
|
Assertions.assertEquals("European Commission", f.getName());
|
||||||
|
Assertions.assertEquals("EU", f.getJurisdiction());
|
||||||
|
|
||||||
|
Assertions.assertEquals("EC::H2020::ERC::ERC-COG", f.getFunding_stream().getId());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"Horizon 2020 Framework Programme - European Research Council - Consolidator Grant",
|
||||||
|
f.getFunding_stream().getDescription());
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,810 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.lenient;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
class QueryInformationSystemTest {
|
||||||
|
|
||||||
|
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||||
|
" and $x//context/param[./@name = 'status']/text() = 'all' " +
|
||||||
|
" return " +
|
||||||
|
"$x//context";
|
||||||
|
|
||||||
|
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
|
||||||
|
+
|
||||||
|
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
|
||||||
|
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
|
||||||
|
+
|
||||||
|
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
|
||||||
|
|
||||||
|
List<String> communityMap = Arrays
|
||||||
|
.asList(
|
||||||
|
"clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin@@ri",
|
||||||
|
"ee@@Sustainable Development Solutions Network - Greece@@The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. "
|
||||||
|
+
|
||||||
|
"SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.@@SDG13 - Climate action,SDG8 - Decent work and economic growth,SDG15 - "
|
||||||
|
+
|
||||||
|
"Life on land,SDG2 - Zero hunger,SDG17 - Partnerships for the ´goals,SDG10 - Reduced inequalities,SDG5 - Gender equality,SDG12 - Responsible consumption and production,SDG14 - Life below water,SDG6 - Clean water and sanitation,SDG11 - Sustainable cities and communities,SDG1 - No poverty,SDG3 - Good health and well being,SDG7 - Affordable and clean energy,SDG4 - Quality education,SDG9 - Industry innovation and infrastructure,SDG16 - Peace justice and strong institutions@@oac_sdsn-greece@@community",
|
||||||
|
"dh-ch@@Digital Humanities and Cultural Heritage@@This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.@@modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels@@oac_dh-ch@@community",
|
||||||
|
"fam@@Fisheries and Aquaculture Management@@Conservation of marine resources for sustainable development. The Fisheries and Aquaculture community focus on resources (document, data, codes..) which have been produced in the framework of projects (H2020, FP7, ..) related to the domain of fisheries and aquaculture.@@Stock Assessment,pelagic,Acoustic,Fish farming,Fisheries,Fishermen,maximum sustainable yield,trawler,Fishing vessel,Fisherman,Fishing gear,mackerel,RFMO,Fish Aggregating Device,Bycatch,Fishery,common fisheries policy,Fishing fleet,Aquaculture@@fisheries@@community",
|
||||||
|
"ni@@Neuroinformatics@@The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.@@brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities@@oac_ni@@community",
|
||||||
|
"mes@@European Marine Science@@This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).@@marine,ocean,fish,aqua,sea@@oac_mes@@community",
|
||||||
|
"instruct@@Instruct-ERIC@@Instruct-ERIC is the European Research Infrastructure for Structural Biology@@@@oac_instruct@@community",
|
||||||
|
"elixir-gr@@The Greek National Node of the ESFRI European RI ELIXIR@@ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.@@@@oaa_elixir-gr@@ri",
|
||||||
|
"aginfra@@Agricultural and Food Sciences@@The scope of this community is to provide access to publications, research data, projects and software that are related to agricultural and food sciences@@animal production and health,fisheries and aquaculture,food safety and human nutrition,information management,food technology,agri-food education and extension,natural resources and environment,food system,engineering technology and Research,agriculture,food safety risk assessment,food security,farming practices and systems,plant production and protection,agri-food economics and policy,Agri-food,food distribution,forestry@@oac_aginfra@@community",
|
||||||
|
"dariah@@DARIAH EU@@The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. @@@@dariah@@ri",
|
||||||
|
"epos@@European Plate Observing System@@EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of data, data products, and facilities from distributed research infrastructures for solid Earth science in Europe.@@@@@@ri",
|
||||||
|
"covid-19@@Corona Virus Disease@@This portal provides access to publications, research data, projects and software that may be relevant to the Corona Virus Disease (COVID-19). The OpenAIRE COVID-19 Gateway aggregates COVID-19 related records, links them and provides a single access point for discovery and navigation. We tag content from the OpenAIRE Research Graph (10,000+ data sources) and additional sources. All COVID-19 related research results are linked to people, organizations and projects, providing a contextualized navigation.@@COVID19,SARS-CoV,HCoV-19,mesh:C000657245,MERS-CoV,Síndrome Respiratorio Agudo Severo,mesh:COVID-19,COVID2019,COVID-19,SARS-CoV-2,2019 novel coronavirus,severe acute respiratory syndrome coronavirus 2,Orthocoronavirinae,Coronaviridae,mesh:D045169,coronavirus,SARS,coronaviruses,coronavirus disease-19,sars cov 2,Middle East Respiratory Syndrome,Severe acute respiratory syndrome coronavirus 2,Severe Acute Respiratory Syndrome,coronavirus disease 2019,2019-nCoV@@covid-19@@community");
|
||||||
|
|
||||||
|
List<String> communityContext = Arrays
|
||||||
|
.asList(
|
||||||
|
"<context id=\"clarin\" label=\"CLARIN\" type=\"ri\">\n" +
|
||||||
|
" <param name=\"status\">all</param>\n" +
|
||||||
|
" <param name=\"description\">CLARIN</param>\n" +
|
||||||
|
" <param name=\"logourl\">https://www.clarin.eu/sites/default/files/clarin-frontpage-logo.jpg</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"name\">Common Language Resources and Technology Infrastructure</param>\n" +
|
||||||
|
" <param name=\"manager\">maria@clarin.eu,dieter@clarin.eu,f.m.g.dejong@uu.nl,paolo.manghi@isti.cnr.it</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"subject\"/>\n" +
|
||||||
|
" <param name=\"suggestedAcknowledgement\">(Part of) the work reported here was made possible by using the CLARIN infrastructure.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"suggestedAcknowledgement\">The work reported here has received funding through <CLARIN national consortium member, e.g. CLARIN.SI>, <XYZ> project, grant no. <XYZ>.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"suggestedAcknowledgement\">The work reported here has received funding (through CLARIN ERIC) from the European Union’s Horizon 2020 research and innovation programme under grant agreement No <0-9> for project <XYZ>.\n"
|
||||||
|
+
|
||||||
|
" (E.g. No 676529 for project CLARIN-PLUS.)</param>\n" +
|
||||||
|
" <param name=\"zenodoCommunity\">oac_clarin</param>\n" +
|
||||||
|
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
|
||||||
|
" <category claim=\"true\" id=\"clarin::projects\" label=\"CLARIN Projects\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"clarin::projects::1\" label=\"CLARIN-PLUS\">\n" +
|
||||||
|
" <param name=\"projectfullname\">CLARIN-PLUS</param>\n" +
|
||||||
|
" <param name=\"suggestedAcknowledgement\"/>\n" +
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">676529</param>\n" +
|
||||||
|
" <param name=\"url\">http://www.clarin.eu</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"funding\">H2020-INFRADEV-1-2015-1</param>\n" +
|
||||||
|
" <param name=\"acronym\">CLARIN+</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"clarin::projects::2\" label=\"CLARIN\">\n" +
|
||||||
|
" <param name=\"projectfullname\">Common Language Resources and Technology Infrastructure</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"acronym\">CLARIN</param>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">212230</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"openaireId\">corda_______::ef782b2d85676aa3e5a907427feb18c4</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"false\" id=\"clarin::contentproviders\" label=\"CLARIN Content providers\">" +
|
||||||
|
"<!--<concept claim=\"true\" id=\"clarin::contentproviders::1\" label=\"Zotero\">\n" +
|
||||||
|
" <param name=\"openaireId\">opendoar____::d96409bf894217686ba124d7356686c9</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"name\">Public Knowledge Project EPrint Archive</param>\n" +
|
||||||
|
" <param name=\"officialname\">Public Knowledge Project EPrint Archive</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept> -->\n" +
|
||||||
|
" <concept claim=\"false\" id=\"clarin::contentproviders::2\" label=\"\">\n" +
|
||||||
|
" <param name=\"name\">LINDAT/CLARIN repository</param>\n" +
|
||||||
|
" <param name=\"officialname\">LINDAT/CLARIN repository</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"true\" id=\"clarin::subcommunity\" label=\"CLARIN communities\">\n" +
|
||||||
|
" <concept claim=\"true\" id=\"clarin::subcommunity::1\" label=\"CLARIN-D\">\n" +
|
||||||
|
" <param name=\"fullname\">CLARIN-D</param>\n" +
|
||||||
|
" <param name=\"homepageUrl\">https://www.clarin-d.de/en/</param>\n" +
|
||||||
|
" <param name=\"suggestedAcknowledgement\"/>\n" +
|
||||||
|
" <param name=\"example\">http://www.lrec-conf.org/proceedings/lrec2018/pdf/504.pdf</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"nation\">Germany</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"false\" id=\"clarin::zenodocommunities\" label=\"CLARIN Zenodo Communities\"/>\n"
|
||||||
|
+
|
||||||
|
" <category claim=\"false\" id=\"clarin::organizations\" label=\"CLARIN Organizations\"/>\n" +
|
||||||
|
"</context>",
|
||||||
|
"<context id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\" type=\"community\">\n" +
|
||||||
|
" <param name=\"status\">all</param>\n" +
|
||||||
|
" <param name=\"description\">This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"logourl\">http://sanmamante.org/DH_CH_logo.png</param>\n" +
|
||||||
|
" <param name=\"name\">Digital Humanities and Cultural Heritage</param>\n" +
|
||||||
|
" <param name=\"manager\">ileniagalluccio87@gmail.com,achille.felicetti@gmail.com,paolo.manghi@isti.cnr.it,tim.evans@york.ac.uk</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"subject\">modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"suggestedAcknowledgement\">The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"zenodoCommunity\">oac_dh-ch</param>\n" +
|
||||||
|
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
|
||||||
|
" <category claim=\"false\" id=\"dh-ch::projects\" label=\"DH-CH Projects\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::projects::1\" label=\"Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"suggestedAcknowledgement\">The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">654119</param>\n" +
|
||||||
|
" <param name=\"url\">http://www.parthenos-project.eu</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">PARTHENOS</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"false\" id=\"dh-ch::contentproviders\" label=\"DH-CH Content providers\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::2\" label=\"The UK's largest collection of digital research data in the social sciences and humanities\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</param>\n" +
|
||||||
|
" <param name=\"name\">The UK's largest collection of digital research data in the social sciences and humanities</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"officialname\">UK Data Archive</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::3\" label=\"Journal of Data Mining and Digital Humanities\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">doajarticles::c6cd4b532e12868c1d760a8d7cda6815</param>\n" +
|
||||||
|
" <param name=\"name\">Journal of Data Mining and Digital Humanities</param>\n" +
|
||||||
|
" <param name=\"officialname\">Journal of Data Mining and Digital Humanities</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::6\" label=\"Frontiers in Digital Humanities\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</param>\n" +
|
||||||
|
" <param name=\"name\">Frontiers in Digital Humanities</param>\n" +
|
||||||
|
" <param name=\"officialname\">Frontiers in Digital Humanities</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::7\" label=\"Il Capitale Culturale: Studies on the Value of Cultural Heritage\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</param>\n" +
|
||||||
|
" <param name=\"name\">Il Capitale Culturale: Studies on the Value of Cultural Heritage</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"officialname\">Il Capitale Culturale: Studies on the Value of Cultural Heritage</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::8\" label=\"Conservation Science in Cultural Heritage\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">doajarticles::0da84e9dfdc8419576169e027baa8028</param>\n" +
|
||||||
|
" <param name=\"name\">Conservation Science in Cultural Heritage</param>\n" +
|
||||||
|
" <param name=\"officialname\">Conservation Science in Cultural Heritage</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::9\" label=\"Electronic Archiving System\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">re3data_____::84e123776089ce3c7a33db98d9cd15a8</param>\n" +
|
||||||
|
" <param name=\"name\">Electronic Archiving System</param>\n" +
|
||||||
|
" <param name=\"officialname\">EASY</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::10\" label=\"DANS-KB Harvester\">\n" +
|
||||||
|
" <param name=\"openaireId\">openaire____::c5502a43e76feab55dd00cf50f519125</param>\n" +
|
||||||
|
" <param name=\"name\">DANS-KB Harvester</param>\n" +
|
||||||
|
" <param name=\"officialname\">Gemeenschappelijke Harvester DANS-KB</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::11\" label=\"ads\">\n" +
|
||||||
|
" <param name=\"openaireId\">re3data_____::a48f09c562b247a9919acfe195549b47</param>\n" +
|
||||||
|
" <param name=\"name\">ads</param>\n" +
|
||||||
|
" <param name=\"officialname\">Archaeology Data Service</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::12\" label=\"\">\n" +
|
||||||
|
" <param name=\"openaireId\">opendoar____::97275a23ca44226c9964043c8462be96</param>\n" +
|
||||||
|
" <param name=\"name\">KNAW Repository</param>\n" +
|
||||||
|
" <param name=\"officialname\">KNAW Repository</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"dh-ch::contentproviders::13\" label=\"Internet Archaeology\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"openaireId\">doajarticles::2899208a99aa7d142646e0a80bfeef05</param>\n" +
|
||||||
|
" <param name=\"name\">Internet Archaeology</param>\n" +
|
||||||
|
" <param name=\"officialname\">Internet Archaeology</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" <param name=\"selcriteria\"/>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
"</context>\n",
|
||||||
|
"<context id=\"ni\" label=\"Neuroinformatics\" type=\"community\">\n" +
|
||||||
|
" <param name=\"status\">all</param>\n" +
|
||||||
|
" <param name=\"description\">The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"logourl\">https://docs.google.com/drawings/u/0/d/10e191xGoGf4uaRluMqbt_7cCj6LSCs2a29im4CmWjqU/export/png</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"name\">Neuroinformatics</param>\n" +
|
||||||
|
" <param name=\"manager\">sorina.pop@creatis.insa-lyon.fr,camille.maumet@inria.fr,christian.barillot@irisa.fr,xavier.rolland@irisa.fr,axel.bonnet@creatis.insa-lyon.fr,paolo.manghi@isti.cnr.it</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"subject\">brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"suggestedAcknowledgement\"/>\n" +
|
||||||
|
" <param name=\"zenodoCommunity\">oac_ni</param>\n" +
|
||||||
|
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
|
||||||
|
" <category claim=\"false\" id=\"ni::contentproviders\" label=\"NI Content providers\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::contentproviders::1\" label=\"OpenNeuro\">\n" +
|
||||||
|
" <param name=\"openaireId\">re3data_____::5b9bf9171d92df854cf3c520692e9122</param>\n" +
|
||||||
|
" <param name=\"name\">Formerly:OpenFMRI</param>\n" +
|
||||||
|
" <param name=\"officialname\">OpenNeuro</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::contentproviders::2\" label=\"RIO\">\n" +
|
||||||
|
" <param name=\"openaireId\">doajarticles::c7d3de67dc77af72f6747157441252ec</param>\n" +
|
||||||
|
" <param name=\"name\">Research Ideas and Outcomes</param>\n" +
|
||||||
|
" <param name=\"officialname\">Research Ideas and Outcomes</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::contentproviders::3\" label=\"NITRC\">\n" +
|
||||||
|
" <param name=\"openaireId\">re3data_____::8515794670370f49c1d176c399c714f5</param>\n" +
|
||||||
|
" <param name=\"name\">Neuroimaging Informatics Tools and Resources Clearinghouse</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"officialname\">NITRC</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::contentproviders::4\" label=\"FRONTIERSNI\">\n" +
|
||||||
|
" <param name=\"openaireId\">doajarticles::d640648c84b10d425f96f11c3de468f3</param>\n" +
|
||||||
|
" <param name=\"name\">Frontiers in Neuroinformatics</param>\n" +
|
||||||
|
" <param name=\"officialname\">Frontiers in Neuroinformatics</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::contentproviders::5\" label=\"NeuroImage: Clinical\">\n" +
|
||||||
|
" <param name=\"openaireId\">doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</param>\n" +
|
||||||
|
" <param name=\"name\">NeuroImage: Clinical</param>\n" +
|
||||||
|
" <param name=\"officialname\">NeuroImage: Clinical</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"ni::contentproviders::6\" label=\"NeuroVault\">\n" +
|
||||||
|
" <param name=\"openaireId\">rest________::fb1a3d4523c95e63496e3bc7ba36244b</param>\n" +
|
||||||
|
" <param name=\"name\">NeuroVault</param>\n" +
|
||||||
|
" <param name=\"officialname\">NeuroVault</param>\n" +
|
||||||
|
" <param name=\"enabled\">true</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
"</context>\n",
|
||||||
|
"<context id=\"instruct\" label=\"Instruct-ERIC\" type=\"ri\">\n" +
|
||||||
|
" <param name=\"status\">all</param>\n" +
|
||||||
|
" <param name=\"description\">Instruct-ERIC is the European Research Infrastructure for Structural Biology</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"logourl\">https://instruct-eric.eu/templates/instructeric/images/logos/instruct-eric-logo-noline.png</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"name\">Instruct-ERIC</param>\n" +
|
||||||
|
" <param name=\"manager\">claudia@instruct-eric.eu,carazo@cnb.csic.es,echrysina@eie.gr,susan@instruct-eric.eu,naomi@instruct-eric.eu,natalie@instruct-eric.eu,pmarie@igbmc.fr,darren.hart@ibs.fr,claudia@strubi.ox.ac.uk,paolo.manghi@isti.cnr.it</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"subject\"/>\n" +
|
||||||
|
" <param name=\"suggestedAcknowledgement\">The authors acknowledge the support and the use of resources of Instruct-ERIC.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"suggestedAcknowledgement\">The authors acknowledge the support and the use of resources of Instruct (PID # or APPID #), a Landmark ESFRI project</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"zenodoCommunity\">oac_instruct</param>\n" +
|
||||||
|
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
|
||||||
|
" <category claim=\"false\" id=\"instruct::projects\" label=\"Instruct-ERIC Projects\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::1\" label=\"Authentication and Authorisation For Research and Collaboration\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Authentication and Authorisation For Research and Collaboration</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">730941</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-EINFRA-2016-1</param>\n" +
|
||||||
|
" <param name=\"acronym\">AARC2</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::2\" label=\"Building data bridges between biological and medical infrastructures in Europe\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Building data bridges between biological and medical infrastructures in Europe</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">284209</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">FP7-INFRASTRUCTURES-2011-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">BioMedBridges</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::3\" label=\"Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">283570</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">FP7-INFRASTRUCTURES-2011-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">BioStruct-X</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::4\" label=\"Coordinated Research Infrastructures Building Enduring Life-science services\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Coordinated Research Infrastructures Building Enduring Life-science services</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">654248</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-INFRADEV-1-2014-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">CORBEL</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::5\" label=\"Infrastructure for NMR, EM and X-rays for translational research\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Infrastructure for NMR, EM and X-rays for translational research</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">653706</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-INFRAIA-2014-2015</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">iNEXT</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::6\" label=\"Integrated Structural Biology Infrastructure\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Integrated Structural Biology Infrastructure</param>\n" +
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">211252</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">FP7-INFRASTRUCTURES-2007-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">INSTRUCT</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::7\" label=\"Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">731005</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-INFRADEV-2016-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">INSTRUCT-ULTRA</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::8\" label=\"Opening Synchrotron Light for Experimental Science and Applications in the Middle East\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Opening Synchrotron Light for Experimental Science and Applications in the Middle East</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">730943</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-INFRASUPP-2016-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">OPEN SESAME</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::9\" label=\"Infrastructure for Protein Production Platforms\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">Infrastructure for Protein Production Platforms</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">227764</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">FP7-INFRASTRUCTURES-2008-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">PCUBE</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::10\" label=\"European Vaccine Research and Development Infrastructure\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">European Vaccine Research and Development Infrastructure</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">730964</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-INFRAIA-2016-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">TRAMSVAC2</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::11\" label=\"World-wide E-infrastructure for structural biology\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"projectfullname\">World-wide E-infrastructure for structural biology</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"rule\"/>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">675858</param>\n" +
|
||||||
|
" <param name=\"url\"/>\n" +
|
||||||
|
" <param name=\"funding\">H2020-EINFRA-2015-1</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"acronym\">West-Life</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::projects::12\" label=\"RI-VIS\">\n" +
|
||||||
|
" <param name=\"projectfullname\">Expanding research infrastructure visibility to strengthen strategic partnerships</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"acronym\">RI-VIS</param>\n" +
|
||||||
|
" <param name=\"CD_PROJECT_NUMBER\">824063</param>\n" +
|
||||||
|
" <param name=\"funder\">EC</param>\n" +
|
||||||
|
" <param name=\"openaireId\">corda__h2020::af93b591b76991d8437993a8f6fc6538</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"false\" id=\"instruct::contentproviders\" label=\"Instruct-ERIC Content providers\"/>\n"
|
||||||
|
+
|
||||||
|
" <category claim=\"false\" id=\"instruct::zenodocommunities\" label=\"Instruct-ERIC Zenodo Communities\">\n"
|
||||||
|
+
|
||||||
|
" <concept claim=\"false\" id=\"instruct::zenodocommunities::1\" label=\"Instruct\">\n" +
|
||||||
|
" <param name=\"zenodoid\">instruct</param>\n" +
|
||||||
|
" <param name=\"selcriteria\"/>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::zenodocommunities::2\" label=\"West-Life Virtual Research Environment for Structural Biology\">\n"
|
||||||
|
+
|
||||||
|
" <param name=\"zenodoid\">west-life</param>\n" +
|
||||||
|
" <param name=\"selcriteria\"/>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"false\" id=\"instruct::organizations\" label=\"Instruct-ERIC Organizations\">\n"
|
||||||
|
+
|
||||||
|
" <concept claim=\"false\" id=\"instruct::organizations::1\" label=\"FRISBI\">\n" +
|
||||||
|
" <param name=\"name\">FRISBI</param>\n" +
|
||||||
|
" <param name=\"logourl\">aHR0cDovL2ZyaXNiaS5ldS9zdGF0aWMvaW1hZ2VzL2xvZ29zL2xvZ28tZnJpc2JpLnBuZw==</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"websiteurl\">aHR0cDovL2ZyaXNiaS5ldS8=</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::organizations::2\" label=\"RI-VIS\">\n" +
|
||||||
|
" <param name=\"name\">RI-VIS</param>\n" +
|
||||||
|
" <param name=\"logourl\">aHR0cHM6Ly9yaS12aXMuZXUvbmV0d29yay9yaXZpcy90ZW1wbGF0ZXMvcml2aXMvaW1hZ2VzL1JJLVZJU0xvZ29GaW5hbC0wNi5wbmc=</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"websiteurl\">aHR0cHM6Ly9yaS12aXMuZXU=</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" <concept claim=\"false\" id=\"instruct::organizations::3\" label=\"CIISB\">\n" +
|
||||||
|
" <param name=\"name\">CIISB</param>\n" +
|
||||||
|
" <param name=\"logourl\">aHR0cDovL2JpYy5jZWl0ZWMuY3ovZmlsZXMvMjkyLzEyNS5KUEc=</param>\n" +
|
||||||
|
" <param name=\"websiteurl\">aHR0cHM6Ly93d3cuY2lpc2Iub3Jn</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
"</context>\n",
|
||||||
|
"<context id=\"elixir-gr\" label=\"ELIXIR GR\" type=\"ri\">\n" +
|
||||||
|
" <param name=\"status\">all</param>\n" +
|
||||||
|
" <param name=\"description\">ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"logourl\">https://elixir-greece.org/sites/default/files/ELIXIR_GREECE_white_background.png</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"name\">The Greek National Node of the ESFRI European RI ELIXIR</param>\n" +
|
||||||
|
" <param name=\"manager\">vergoulis@imis.athena-innovation.gr,schatz@imis.athena-innovation.gr,paolo.manghi@isti.cnr.it</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"subject\"/>\n" +
|
||||||
|
" <param name=\"suggestedAcknowledgement\"/>\n" +
|
||||||
|
" <param name=\"zenodoCommunity\">oaa_elixir-gr</param>\n" +
|
||||||
|
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
|
||||||
|
" <category claim=\"false\" id=\"elixir-gr::projects\" label=\"ELIXIR GR Projects\"/>\n" +
|
||||||
|
" <category claim=\"false\" id=\"elixir-gr::contentproviders\" label=\"Elixir-GR Content providers\">\n"
|
||||||
|
+
|
||||||
|
" <concept claim=\"false\" id=\"elixir-gr::contentproviders::1\" label=\"bio.tools\">\n" +
|
||||||
|
" <param name=\"openaireId\">rest________::b8e502674c3c3499d5374e9b2ea6d8d5</param>\n" +
|
||||||
|
" <param name=\"name\">bio.tools</param>\n" +
|
||||||
|
" <param name=\"officialname\">bio.tools</param>\n" +
|
||||||
|
" <param name=\"enabled\">false</param>\n" +
|
||||||
|
" <param name=\"selcriteria\"/>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category>\n" +
|
||||||
|
" <category claim=\"false\" id=\"elixir-gr::zenodocommunities\" label=\"Elixir-GR Zenodo Communities\"/>\n"
|
||||||
|
+
|
||||||
|
" <category claim=\"false\" id=\"elixir-gr::organizations\" label=\"Elixir-GR Organizations\">\n" +
|
||||||
|
" <concept claim=\"false\" id=\"elixir-gr::organizations::1\" label=\"ATHENA RC\">\n" +
|
||||||
|
" <param name=\"name\">ATHENA RC</param>\n" +
|
||||||
|
" <param name=\"logourl\">aHR0cHM6Ly9lbGl4aXItZ3JlZWNlLm9yZy9zaXRlcy9kZWZhdWx0L2ZpbGVzL3N0eWxlcy90aHVtYm5haWwvcHVibGljL3BhcnRuZXJfbG9nb3MvYXRoZW5hX2xvZ28uanBnP2l0b2s9VXdGWFNpZng=</param>\n"
|
||||||
|
+
|
||||||
|
" <param name=\"websiteurl\">aHR0cHM6Ly93d3cuYXRoZW5hLWlubm92YXRpb24uZ3IvZW4=</param>\n" +
|
||||||
|
" </concept>\n" +
|
||||||
|
" </category><!-- <category claim=\"false\" id=\"elixir-gr::resultorganizations\" label=\"Elixir-GR Results through organizations\"/> -->\n"
|
||||||
|
+
|
||||||
|
"</context>");
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
private ISLookUpService isLookUpService;
|
||||||
|
|
||||||
|
private QueryInformationSystem queryInformationSystem;
|
||||||
|
|
||||||
|
private Map<String, String> map;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void setUp() throws ISLookUpException {
|
||||||
|
lenient().when(isLookUpService.quickSearchProfile(XQUERY_ENTITY)).thenReturn(communityMap);
|
||||||
|
lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityContext);
|
||||||
|
queryInformationSystem = new QueryInformationSystem();
|
||||||
|
queryInformationSystem.setIsLookUp(isLookUpService);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testSizeEntity() throws ISLookUpException {
|
||||||
|
|
||||||
|
List<ContextInfo> cInfoList = new ArrayList<>();
|
||||||
|
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
|
||||||
|
queryInformationSystem.getContextInformation(consumer);
|
||||||
|
|
||||||
|
Assertions.assertEquals(12, cInfoList.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testSizeRelation() throws ISLookUpException {
|
||||||
|
|
||||||
|
List<ContextInfo> cInfoList = new ArrayList<>();
|
||||||
|
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
|
||||||
|
queryInformationSystem.execContextRelationQuery();
|
||||||
|
queryInformationSystem
|
||||||
|
.getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource"));
|
||||||
|
|
||||||
|
Assertions.assertEquals(5, cInfoList.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testContentRelation() throws ISLookUpException {
|
||||||
|
|
||||||
|
List<ContextInfo> cInfoList = new ArrayList<>();
|
||||||
|
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
|
||||||
|
queryInformationSystem.execContextRelationQuery();
|
||||||
|
queryInformationSystem
|
||||||
|
.getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource"));
|
||||||
|
|
||||||
|
cInfoList.forEach(contextInfo -> {
|
||||||
|
switch (contextInfo.getId()) {
|
||||||
|
case "elixir-gr":
|
||||||
|
Assertions.assertEquals(1, contextInfo.getDatasourceList().size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5",
|
||||||
|
contextInfo.getDatasourceList().get(0));
|
||||||
|
break;
|
||||||
|
case "instruct":
|
||||||
|
Assertions.assertEquals(0, contextInfo.getDatasourceList().size());
|
||||||
|
break;
|
||||||
|
case "ni":
|
||||||
|
Assertions.assertEquals(6, contextInfo.getDatasourceList().size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
contextInfo
|
||||||
|
.getDatasourceList()
|
||||||
|
.contains("10|rest________::fb1a3d4523c95e63496e3bc7ba36244b"));
|
||||||
|
break;
|
||||||
|
case "dh-ch":
|
||||||
|
Assertions.assertEquals(10, contextInfo.getDatasourceList().size());
|
||||||
|
break;
|
||||||
|
case "clarin":
|
||||||
|
Assertions.assertEquals(0, contextInfo.getDatasourceList().size());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testContentEntity() throws ISLookUpException {
|
||||||
|
|
||||||
|
List<ContextInfo> cInfoList = new ArrayList<>();
|
||||||
|
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
|
||||||
|
queryInformationSystem.getContextInformation(consumer);
|
||||||
|
|
||||||
|
cInfoList.forEach(context -> {
|
||||||
|
switch (context.getId()) {
|
||||||
|
case "clarin":// clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin",
|
||||||
|
Assertions
|
||||||
|
.assertEquals("Common Language Resources and Technology Infrastructure", context.getName());
|
||||||
|
Assertions.assertEquals("CLARIN", context.getDescription());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
Optional
|
||||||
|
.ofNullable(context.getSubject())
|
||||||
|
.map(value -> false)
|
||||||
|
.orElse(true));
|
||||||
|
Assertions.assertEquals("oac_clarin", context.getZenodocommunity());
|
||||||
|
Assertions.assertEquals("ri", context.getType());
|
||||||
|
break;
|
||||||
|
case "ee":
|
||||||
|
Assertions.assertEquals("Sustainable Development Solutions Network - Greece", context.getName());
|
||||||
|
Assertions.assertTrue(context.getDescription().length() > 0);
|
||||||
|
Assertions
|
||||||
|
.assertFalse(
|
||||||
|
Optional
|
||||||
|
.ofNullable(context.getSubject())
|
||||||
|
.map(value -> false)
|
||||||
|
.orElse(true));
|
||||||
|
Assertions.assertEquals(17, context.getSubject().size());
|
||||||
|
Assertions.assertEquals("oac_sdsn-greece", context.getZenodocommunity());
|
||||||
|
Assertions.assertEquals("community", context.getType());
|
||||||
|
break;
|
||||||
|
case "dh-ch":
|
||||||
|
Assertions.assertEquals("Digital Humanities and Cultural Heritage", context.getName());
|
||||||
|
Assertions.assertTrue(context.getDescription().length() > 0);
|
||||||
|
Assertions
|
||||||
|
.assertFalse(
|
||||||
|
Optional
|
||||||
|
.ofNullable(context.getSubject())
|
||||||
|
.map(value -> false)
|
||||||
|
.orElse(true));
|
||||||
|
Assertions.assertEquals(67, context.getSubject().size());
|
||||||
|
Assertions.assertEquals("oac_dh-ch", context.getZenodocommunity());
|
||||||
|
Assertions.assertEquals("community", context.getType());
|
||||||
|
break;
|
||||||
|
case "fam":
|
||||||
|
Assertions.assertEquals("Fisheries and Aquaculture Management", context.getName());
|
||||||
|
Assertions.assertTrue(context.getDescription().length() > 0);
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
context
|
||||||
|
.getDescription()
|
||||||
|
.startsWith("Conservation of marine resources for sustainable development"));
|
||||||
|
Assertions
|
||||||
|
.assertFalse(
|
||||||
|
Optional
|
||||||
|
.ofNullable(context.getSubject())
|
||||||
|
.map(value -> false)
|
||||||
|
.orElse(true));
|
||||||
|
Assertions.assertEquals(19, context.getSubject().size());
|
||||||
|
Assertions.assertEquals("fisheries", context.getZenodocommunity());
|
||||||
|
Assertions.assertEquals("community", context.getType());
|
||||||
|
break;
|
||||||
|
case "ni":
|
||||||
|
Assertions.assertEquals("Neuroinformatics", context.getName());
|
||||||
|
Assertions.assertTrue(context.getDescription().length() > 0);
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
context
|
||||||
|
.getDescription()
|
||||||
|
.startsWith("The neuroinformatics dashboard gathers research outputs from the"));
|
||||||
|
Assertions
|
||||||
|
.assertFalse(
|
||||||
|
Optional
|
||||||
|
.ofNullable(context.getSubject())
|
||||||
|
.map(value -> false)
|
||||||
|
.orElse(true));
|
||||||
|
Assertions.assertEquals(18, context.getSubject().size());
|
||||||
|
Assertions.assertEquals("oac_ni", context.getZenodocommunity());
|
||||||
|
Assertions.assertEquals("community", context.getType());
|
||||||
|
Assertions.assertTrue(context.getSubject().contains("brain"));
|
||||||
|
break;
|
||||||
|
case "mes":
|
||||||
|
Assertions.assertEquals("European Marine Science", context.getName());
|
||||||
|
Assertions.assertTrue(context.getDescription().length() > 0);
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
context
|
||||||
|
.getDescription()
|
||||||
|
.startsWith(
|
||||||
|
"This community was initially defined to include a very broad range of topics"));
|
||||||
|
Assertions
|
||||||
|
.assertFalse(
|
||||||
|
Optional
|
||||||
|
.ofNullable(context.getSubject())
|
||||||
|
.map(value -> false)
|
||||||
|
.orElse(true));
|
||||||
|
Assertions.assertEquals(5, context.getSubject().size());
|
||||||
|
Assertions.assertEquals("oac_mes", context.getZenodocommunity());
|
||||||
|
Assertions.assertEquals("community", context.getType());
|
||||||
|
Assertions.assertTrue(context.getSubject().contains("sea"));
|
||||||
|
Assertions.assertTrue(context.getSubject().contains("fish"));
|
||||||
|
Assertions.assertTrue(context.getSubject().contains("ocean"));
|
||||||
|
Assertions.assertTrue(context.getSubject().contains("aqua"));
|
||||||
|
Assertions.assertTrue(context.getSubject().contains("marine"));
|
||||||
|
break;
|
||||||
|
case "instruct":
|
||||||
|
Assertions.assertEquals("Instruct-ERIC", context.getName());
|
||||||
|
Assertions.assertTrue(context.getDescription().length() > 0);
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
context
|
||||||
|
.getDescription()
|
||||||
|
.equals(
|
||||||
|
"Instruct-ERIC is the European Research Infrastructure for Structural Biology"));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
Optional
|
||||||
|
.ofNullable(context.getSubject())
|
||||||
|
.map(value -> false)
|
||||||
|
.orElse(true));
|
||||||
|
Assertions.assertEquals("oac_instruct", context.getZenodocommunity());
|
||||||
|
Assertions.assertEquals("community", context.getType());
|
||||||
|
|
||||||
|
break;
|
||||||
|
case "elixir-gr":
|
||||||
|
Assertions
|
||||||
|
.assertEquals("The Greek National Node of the ESFRI European RI ELIXIR", context.getName());
|
||||||
|
Assertions.assertTrue(context.getDescription().length() > 0);
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
context
|
||||||
|
.getDescription()
|
||||||
|
.startsWith(
|
||||||
|
"ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open"));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
Optional
|
||||||
|
.ofNullable(context.getSubject())
|
||||||
|
.map(value -> false)
|
||||||
|
.orElse(true));
|
||||||
|
Assertions.assertEquals("oaa_elixir-gr", context.getZenodocommunity());
|
||||||
|
Assertions.assertEquals("ri", context.getType());
|
||||||
|
|
||||||
|
break;
|
||||||
|
case "aginfra":
|
||||||
|
Assertions.assertEquals("Agricultural and Food Sciences", context.getName());
|
||||||
|
Assertions.assertTrue(context.getDescription().length() > 0);
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
context
|
||||||
|
.getDescription()
|
||||||
|
.startsWith(
|
||||||
|
"The scope of this community is to provide access to publications, research data, projects and software"));
|
||||||
|
Assertions
|
||||||
|
.assertFalse(
|
||||||
|
Optional
|
||||||
|
.ofNullable(context.getSubject())
|
||||||
|
.map(value -> false)
|
||||||
|
.orElse(true));
|
||||||
|
Assertions.assertEquals(18, context.getSubject().size());
|
||||||
|
Assertions.assertEquals("oac_aginfra", context.getZenodocommunity());
|
||||||
|
Assertions.assertEquals("community", context.getType());
|
||||||
|
Assertions.assertTrue(context.getSubject().contains("food distribution"));
|
||||||
|
break;
|
||||||
|
case "dariah":
|
||||||
|
Assertions.assertEquals("DARIAH EU", context.getName());
|
||||||
|
Assertions.assertTrue(context.getDescription().length() > 0);
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
context
|
||||||
|
.getDescription()
|
||||||
|
.startsWith(
|
||||||
|
"The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support "));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
Optional
|
||||||
|
.ofNullable(context.getSubject())
|
||||||
|
.map(value -> false)
|
||||||
|
.orElse(true));
|
||||||
|
|
||||||
|
Assertions.assertEquals("dariah", context.getZenodocommunity());
|
||||||
|
Assertions.assertEquals("ri", context.getType());
|
||||||
|
|
||||||
|
break;
|
||||||
|
case "epos":
|
||||||
|
Assertions.assertEquals("European Plate Observing System", context.getName());
|
||||||
|
Assertions.assertTrue(context.getDescription().length() > 0);
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
context
|
||||||
|
.getDescription()
|
||||||
|
.startsWith(
|
||||||
|
"EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of "));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
Optional
|
||||||
|
.ofNullable(context.getSubject())
|
||||||
|
.map(value -> false)
|
||||||
|
.orElse(true));
|
||||||
|
|
||||||
|
Assertions.assertEquals("", context.getZenodocommunity());
|
||||||
|
Assertions.assertEquals("ri", context.getType());
|
||||||
|
|
||||||
|
break;
|
||||||
|
case "covid-19":
|
||||||
|
Assertions.assertEquals("Corona Virus Disease", context.getName());
|
||||||
|
Assertions.assertTrue(context.getDescription().length() > 0);
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
context
|
||||||
|
.getDescription()
|
||||||
|
.startsWith(
|
||||||
|
"This portal provides access to publications, research data, projects and "));
|
||||||
|
Assertions
|
||||||
|
.assertFalse(
|
||||||
|
Optional
|
||||||
|
.ofNullable(context.getSubject())
|
||||||
|
.map(value -> false)
|
||||||
|
.orElse(true));
|
||||||
|
Assertions.assertEquals(25, context.getSubject().size());
|
||||||
|
Assertions.assertEquals("covid-19", context.getZenodocommunity());
|
||||||
|
Assertions.assertEquals("community", context.getType());
|
||||||
|
Assertions.assertTrue(context.getSubject().contains("coronavirus disease 2019"));
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,121 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
|
||||||
|
|
||||||
|
public class RelationFromOrganizationTest {
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory
|
||||||
|
.getLogger(RelationFromOrganizationTest.class);
|
||||||
|
|
||||||
|
private static final HashMap<String, String> map = new HashMap<>();
|
||||||
|
|
||||||
|
String organizationCommunityMap = "{\"20|grid________::afaa39865943381c51f76c08725ffa75\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8\":[\"mes\",\"euromarine\"], \"20|snsf________::9b253f265e3bef5cae6d881fdf61aceb\":[\"mes\",\"euromarine\"],\"20|rcuk________::e054eea0a47665af8c3656b5785ccf76\":[\"mes\",\"euromarine\"],\"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151\":[\"mes\",\"euromarine\"],\"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27\":[\"mes\",\"euromarine\"],\"20|snsf________::8fa091f8f25a846779acb4ea97b50aef\":[\"mes\",\"euromarine\"],\"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|corda_______::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78\":[\"mes\",\"euromarine\"],\"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70\":[\"mes\",\"euromarine\"],\"20|rcuk________::e16010089551a1a9182a94604fc0ea59\":[\"mes\",\"euromarine\"],\"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7\":[\"mes\",\"euromarine\"],\"20|snsf________::74730ef1439d7f7636a8be58a6b471b8\":[\"mes\",\"euromarine\"],\"20|nsf_________::ad72e19043a5a467e35f9b444d11563e\":[\"mes\",\"euromarine\"],\"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3\":[\"mes\",\"euromarine\"],\"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea\":[\"mes\",\"euromarine\"],\"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317\":[\"mes\",\"euromarine\"], \"20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f\":[\"mes\",\"euromarine\"], \"20|corda__h2020::65531bd11be9935948c7f2f4db1c1832\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946\":[\"mes\",\"euromarine\"], \"20|snsf________::3eb43582ac27601459a8d8b3e195724b\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6\":[\"mes\",\"euromarine\"], \"20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0\":[\"mes\",\"euromarine\"], \"20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0\":[\"beopen\"], "
|
||||||
|
+
|
||||||
|
"\"20|grid________::a867f78acdc5041b34acfe4f9a349157\":[\"beopen\"], \"20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff\":[\"beopen\"], \"20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad\":[\"beopen\"], \"20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602\":[\"beopen\"], \"20|corda_______::8ba50792bc5f4d51d79fca47d860c602\":[\"beopen\"], \"20|corda__h2020::e70e9114979e963eef24666657b807c3\":[\"beopen\"], \"20|corda_______::e70e9114979e963eef24666657b807c3\":[\"beopen\"], \"20|corda_______::15911e01e9744d57205825d77c218737\":[\"beopen\"], \"20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab\":[\"beopen\"], \"20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3\":[\"beopen\"], \"20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3\":[\"beopen\"], \"20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9\":[\"beopen\"], \"20|corda_______::3ff558e30c2e434d688539548300b050\":[\"beopen\"], \"20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39\":[\"beopen\"], \"20|corda__h2020::5187217e2e806a6df3579c46f82401bc\":[\"beopen\"], \"20|grid________::5fa7e2709bcd945e26bfa18689adeec1\":[\"beopen\"], \"20|corda_______::d8696683c53027438031a96ad27c3c07\":[\"beopen\"], \"20|corda__h2020::d8696683c53027438031a96ad27c3c07\":[\"beopen\"], \"20|rcuk________::23a79ebdfa59790864e4a485881568c1\":[\"beopen\"], \"20|corda__h2020::b76cf8fe49590a966953c37e18608af9\":[\"beopen\"], \"20|grid________::d2f0204126ee709244a488a4cd3b91c2\":[\"beopen\"], \"20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6\":[\"beopen\"], \"20|grid________::802401579481dc32062bdee69f5e6a34\":[\"beopen\"], \"20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d\":[\"beopen\"]}";
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files
|
||||||
|
.createTempDirectory(RelationFromOrganizationTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(RelationFromOrganizationTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(RelationFromOrganizationTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void test1() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/relation")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymapservices.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkOrganizationRelation.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/relation",
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-organizationCommunityMap", organizationCommunityMap,
|
||||||
|
"-communityMapPath", communityMapPath
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Relation> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/relation")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<Relation> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("table");
|
||||||
|
|
||||||
|
// Assertions.assertEquals(170, verificationDataset.count());
|
||||||
|
Assertions.assertEquals(0, verificationDataset.count());
|
||||||
|
|
||||||
|
// Dataset<Row> checkDs = spark
|
||||||
|
// .sql(
|
||||||
|
// "Select source.id, source.type " +
|
||||||
|
// "from table ");
|
||||||
|
//
|
||||||
|
// Assertions.assertEquals(2, checkDs.filter("substr(id, 4, 5) = 'dedup' ").count());
|
||||||
|
//
|
||||||
|
// Assertions.assertEquals(0, checkDs.filter("id = '20|grid________::afaa39865943381c51f76c08725ffa75'").count());
|
||||||
|
//
|
||||||
|
// Assertions.assertEquals(25, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("beopen") + "'").count());
|
||||||
|
//
|
||||||
|
// Assertions
|
||||||
|
// .assertEquals(30, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("euromarine") + "'").count());
|
||||||
|
//
|
||||||
|
// Assertions.assertEquals(30, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("mes") + "'").count());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,97 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
||||||
|
public class SelectRelationTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory
|
||||||
|
.getLogger(SelectRelationTest.class);
|
||||||
|
|
||||||
|
private static HashMap<String, String> map = new HashMap<>();
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files
|
||||||
|
.createTempDirectory(SelectRelationTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(SelectRelationTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(SelectRelationTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test1() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/selectrelations")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkSelectValidRelationsJob.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/relation",
|
||||||
|
"-sourcePath", sourcePath
|
||||||
|
});
|
||||||
|
|
||||||
|
// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset");
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Relation> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/relation")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
|
||||||
|
|
||||||
|
Dataset<Relation> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
||||||
|
|
||||||
|
Assertions.assertTrue(verificationDataset.count() == 7);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,139 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.funderresult;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
|
||||||
|
public class ResultLinkedToProjectTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory
|
||||||
|
.getLogger(ResultLinkedToProjectTest.class);
|
||||||
|
|
||||||
|
private static final HashMap<String, String> map = new HashMap<>();
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files
|
||||||
|
.createTempDirectory(
|
||||||
|
ResultLinkedToProjectTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(ResultLinkedToProjectTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(ResultLinkedToProjectTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testNoMatch() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String graphPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkResultLinkedToProject.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/preparedInfo",
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
|
||||||
|
"-graphPath", graphPath,
|
||||||
|
"-communityMapPath", communityMapPath
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/preparedInfo")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(0, tmp.count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMatchOne() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String graphPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkResultLinkedToProject.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/preparedInfo",
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
|
||||||
|
"-graphPath", graphPath,
|
||||||
|
"-communityMapPath", communityMapPath
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/preparedInfo")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, tmp.count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,145 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.funderresult;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
||||||
|
|
||||||
|
public class SplitPerFunderTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SplitPerFunderTest.class);
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files.createTempDirectory(SplitPerFunderTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(SplitPerFunderTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(SplitPerFunderTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void test1() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SparkDumpFunderResults.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/split",
|
||||||
|
"-sourcePath", sourcePath
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
// FP7 3 and H2020 3
|
||||||
|
JavaRDD<CommunityResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/EC_FP7")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
|
||||||
|
Dataset<CommunityResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(3, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, verificationDataset.filter("id = '50|dedup_wf_001::0d16b1714ab3077df73893a8ea57d776'").count());
|
||||||
|
|
||||||
|
// CIHR 2
|
||||||
|
tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/CIHR")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
Assertions.assertEquals(2, tmp.count());
|
||||||
|
|
||||||
|
// NWO 1
|
||||||
|
tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/NWO")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
Assertions.assertEquals(1, tmp.count());
|
||||||
|
|
||||||
|
// NIH 3
|
||||||
|
tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/NIH")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
Assertions.assertEquals(2, tmp.count());
|
||||||
|
|
||||||
|
// NSF 1
|
||||||
|
tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/NSF")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
Assertions.assertEquals(1, tmp.count());
|
||||||
|
|
||||||
|
// SNSF 1
|
||||||
|
tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/SNSF")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
Assertions.assertEquals(1, tmp.count());
|
||||||
|
|
||||||
|
// NHMRC 1
|
||||||
|
tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/NHMRC")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
Assertions.assertEquals(1, tmp.count());
|
||||||
|
|
||||||
|
// H2020 3
|
||||||
|
tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/EC_H2020")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
Assertions.assertEquals(3, tmp.count());
|
||||||
|
|
||||||
|
// MZOS 1
|
||||||
|
tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/split/MZOS")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
|
||||||
|
Assertions.assertEquals(1, tmp.count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,124 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.projectssubset;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
|
||||||
|
|
||||||
|
public class ProjectSubsetTest {
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
private static SparkSession spark;
|
||||||
|
private static Path workingDir;
|
||||||
|
private static final Logger log = LoggerFactory
|
||||||
|
.getLogger(ProjectSubsetTest.class);
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files
|
||||||
|
.createTempDirectory(
|
||||||
|
ProjectSubsetTest.class.getSimpleName());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(ProjectSubsetTest.class.getSimpleName());
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(ProjectSubsetTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testAllNew() throws Exception {
|
||||||
|
final String projectListPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId")
|
||||||
|
.getPath();
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/allnew/projects")
|
||||||
|
.getPath();
|
||||||
|
spark
|
||||||
|
.read()
|
||||||
|
.textFile(projectListPath)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.text(workingDir.toString() + "/projectIds");
|
||||||
|
ProjectsSubsetSparkJob.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/projects",
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-projectListPath", workingDir.toString() + "/projectIds"
|
||||||
|
});
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
JavaRDD<Project> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/projects")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
|
||||||
|
Assertions.assertEquals(12, tmp.count());
|
||||||
|
Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("aka_________")).count());
|
||||||
|
Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("anr_________")).count());
|
||||||
|
Assertions.assertEquals(4, tmp.filter(p -> p.getId().substring(3, 15).equals("arc_________")).count());
|
||||||
|
Assertions.assertEquals(3, tmp.filter(p -> p.getId().substring(3, 15).equals("conicytf____")).count());
|
||||||
|
Assertions.assertEquals(1, tmp.filter(p -> p.getId().substring(3, 15).equals("corda_______")).count());
|
||||||
|
Assertions.assertEquals(40, sc.textFile(workingDir.toString() + "/projectIds").count());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMatchOne() throws Exception {
|
||||||
|
final String projectListPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId")
|
||||||
|
.getPath();
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/matchOne/projects")
|
||||||
|
.getPath();
|
||||||
|
spark
|
||||||
|
.read()
|
||||||
|
.textFile(projectListPath)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.text(workingDir.toString() + "/projectIds");
|
||||||
|
ProjectsSubsetSparkJob.main(new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-outputPath", workingDir.toString() + "/projects",
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-projectListPath", workingDir.toString() + "/projectIds"
|
||||||
|
});
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
JavaRDD<Project> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/projects")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
|
||||||
|
Assertions.assertEquals(11, tmp.count());
|
||||||
|
Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("aka_________")).count());
|
||||||
|
Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("anr_________")).count());
|
||||||
|
Assertions.assertEquals(4, tmp.filter(p -> p.getId().substring(3, 15).equals("arc_________")).count());
|
||||||
|
Assertions.assertEquals(3, tmp.filter(p -> p.getId().substring(3, 15).equals("conicytf____")).count());
|
||||||
|
Assertions.assertEquals(0, tmp.filter(p -> p.getId().substring(3, 15).equals("corda__h2020")).count());
|
||||||
|
Assertions.assertEquals(39, sc.textFile(workingDir.toString() + "/projectIds").count());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,2 @@
|
||||||
|
{"projectsList":[{"code":"123455","funder":{"jurisdiction":"FI","name":"Academy of Finland","shortName":"AKA"},"id":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","provenance":{"provenance":"sysimport:crosswalk:entityregistry","trust":"0.900000000000000022"},"title":"Business services for rural bioenergy entrepreneurship in Finland: a network analysis approach"},{"code":"119027","funder":{"fundingStream":"H2020","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|aka_________::03376222b28a3aebf2730ac514818d04","provenance":{"provenance":"sysimport:crosswalk:entityregistry","trust":"0.900000000000000022"},"title":"EGFR Tyrosine Kinase Inhibitors and LKB1 Tumor Suppressor in Non-Small-Cell Lung Cancer"}],"resultId":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"}
|
||||||
|
{"projectsList":[{"code":"123455","funder":{"jurisdiction":"FI","name":"Academy of Finland","shortName":"AKA"},"id":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","provenance":{"provenance":"sysimport:crosswalk:entityregistry","trust":"0.900000000000000022"},"title":"Business services for rural bioenergy entrepreneurship in Finland: a network analysis approach"}],"resultId":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"}
|
|
@ -0,0 +1,2 @@
|
||||||
|
{"resultId":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","projectsList":[{"id":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","code":"123455","acronym":null,"title":"Business services for rural bioenergy entrepreneurship in Finland: a network analysis approach","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"sysimport:crosswalk:entityregistry","trust":"0.900000000000000022"},"validated":{"validationDate":"2021-08-06","validatedByFunder":true}},{"id":"40|aka_________::03376222b28a3aebf2730ac514818d04","code":"119027","acronym":null,"title":"EGFR Tyrosine Kinase Inhibitors and LKB1 Tumor Suppressor in Non-Small-Cell Lung Cancer","funder":{"shortName":"EC","name":"European Commission","jurisdiction":"EU","fundingStream":"H2020"},"provenance":{"provenance":"sysimport:crosswalk:entityregistry","trust":"0.900000000000000022"},"validated":null}]}
|
||||||
|
{"resultId":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80","projectsList":[{"id":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","code":"123455","acronym":null,"title":"Business services for rural bioenergy entrepreneurship in Finland: a network analysis approach","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"sysimport:crosswalk:entityregistry","trust":"0.900000000000000022"},"validated":{"validationDate":"2021-08-04","validatedByFunder":true}}]}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
{"ee":"SDSN - Greece","epos":"EPOS","enrmaps":"Energy Research","fet-h2020":"FET H2020","instruct":"Instruct-Eric","egi":"EGI Federation","euromarine":"Euromarine","covid-19":"COVID-19","dariah":"DARIAH EU","rda":"Research Data Alliance","clarin":"CLARIN","aginfra":"Agricultural and Food Sciences","risis":"RISI","fam":"Fisheries and Aquaculture Management","beopen":"Transport Research","elixir-gr":"ELIXIR GR","fet-fp7":"FET FP7","ifremer":"Ifremer","science-innovation-policy":"Science and Innovation Policy Studies","mes":"European Marine Scinece","oa-pg":"EC Post-Grant Open Access Pilot","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}
|
|
@ -0,0 +1 @@
|
||||||
|
{"egi":"EGI Federation","covid-19":"COVID-19","rda":"Research Data Alliance","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}
|
|
@ -0,0 +1,5 @@
|
||||||
|
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2018-06-05","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal of Applied Mathematics and Stochastic Analysis"},"extraInfo":[],"id":"10|doajarticles::1fa6859d71faa77b32d82f278c6ed1df","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj10489533"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal of Applied Mathematics and Stochastic Analysis"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1048-9533",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://www.hindawi.com/journals/jamsa"}}
|
||||||
|
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-05-25","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Pelitutkimuksen vuosikirja"},"extraInfo":[],"id":"10|doajarticles::9c4b678901e5276d9e3addee566816af","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj1798355X"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Pelitutkimuksen vuosikirja"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1798-355X",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geography. Anthropology. Recreation: Recreation. Leisure | Science: Mathematics: Instruments and machines: Electronic computers. Computer science: Computer software"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.pelitutkimus.fi"}}
|
||||||
|
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2018-06-05","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Statistika: Statistics and Economy Journal"},"extraInfo":[],"id":"10|doajarticles::a5314b60f79b869cb5d3a2709167bc3a","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj0322788X"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Statistika: Statistics and Economy Journal"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0322-788X",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Social Sciences: Statistics"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.czso.cz/statistika_journal"}}
|
||||||
|
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2018-06-05","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Review of Development Finance"},"extraInfo":[],"id":"10|doajarticles::acb7c79bb85d3b3a7b75389f5d9570f5","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj18799337"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Review of Development Finance"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1879-9337",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Social Sciences: Industries. Land use. Labor: Economic growth, development, planning | Social Sciences: Finance"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.journals.elsevier.com/review-of-development-finance/"}}
|
||||||
|
{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-05-28","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The Journal of Advanced Navigation Technology"},"extraInfo":[],"id":"10|issn___print::0a79337eaf5145faa478785423273355","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"jrnl12269026"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The Journal of Advanced Navigation Technology"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1226-9026",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false}}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,6 @@
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-beta","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"properties":[],"relClass":"merges","relType":"organizationOrganization","source":"20|dedup_wf_001::00edd377ceb26454f8d644bd36383d7a","subRelType":"dedup","target":"20|grid________::afaa39865943381c51f76c08725ffa75"}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-beta","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"properties":[],"relClass":"merges","relType":"organizationOrganization","source":"20|dedup_wf_001::03c3cc44ed5921066438b5acf8bd61d0","subRelType":"dedup","target":"20|grid________::63daa724c27cc3511166414ae10d2860"}
|
||||||
|
{"collectedfrom":[{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|dedup_wf_001::03cf8c9e4906720a09016208c934a9e0","subRelType":"affiliation","target":"50|doiboost____::6923f21ef86bee98e6f595c0edb0d44d"}
|
||||||
|
{"collectedfrom":[{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|dedup_wf_001::03d0bfb13075c2310eed228ee5c0b6a0","subRelType":"affiliation","target":"50|dedup_wf_001::8265619189c8708f57a652e97831234d"}
|
||||||
|
{"collectedfrom":[{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|dedup_wf_001::04164fc20b3138346446a164415de38f","subRelType":"affiliation","target":"50|doiboost____::09de6ea4c1a94458e58c491699f0988c"}
|
||||||
|
{"collectedfrom":[{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|dedup_wf_001::04164fc20b3138346446a164415de38f","subRelType":"affiliation","target":"50|doiboost____::6c8d54e446d72efa7a53497a4b505b7c"}
|
|
@ -0,0 +1 @@
|
||||||
|
{"ee":"SDSN - Greece","epos":"EPOS","enrmaps":"Energy Research","fet-h2020":"FET H2020","instruct":"Instruct-Eric","egi":"EGI Federation","euromarine":"Euromarine","covid-19":"COVID-19","dariah":"DARIAH EU","rda":"Research Data Alliance","clarin":"CLARIN","aginfra":"Agricultural and Food Sciences","risis":"RISI","fam":"Fisheries and Aquaculture Management","beopen":"Transport Research","elixir-gr":"ELIXIR GR","fet-fp7":"FET FP7","ifremer":"Ifremer","science-innovation-policy":"Science and Innovation Policy Studies","mes":"European Marine Scinece","oa-pg":"EC Post-Grant Open Access Pilot","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue