forked from D-Net/dnet-hadoop
changed the implementation of select result so to run it for all of the four results. Decoupled the execution of atomic actions redistribution over the original id for relation and result
This commit is contained in:
parent
9d33f0e6d7
commit
4205b4c99d
|
@ -0,0 +1,130 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.remapping;
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaPairRDD;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
public class SparkRedistributeIISRelations implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkRedistributeIISRelations.class);
|
||||||
|
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkRedistributeIISRelations.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/remapping/input_redistribute_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("inputPath");
|
||||||
|
|
||||||
|
final String asInputPath = parser.get("asInputPath");
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
common.removeOutputDir(spark, outputPath);
|
||||||
|
Dataset<ResultPid> resultPidDataset = common.readPath(spark, inputPath , ResultPid.class);
|
||||||
|
Dataset<ASResultInfo> asResultInfoDataset = common.readPath(spark, asInputPath, ASResultInfo.class);
|
||||||
|
execRelation(spark, asResultInfoDataset.filter("type = 'relation'"), resultPidDataset, outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void execRelation(SparkSession spark, Dataset<ASResultInfo> asResultInfoDataset,
|
||||||
|
Dataset<ResultPid> resultPidDataset, String outputPathRelation) {
|
||||||
|
resultPidDataset.joinWith(asResultInfoDataset, resultPidDataset.col("resultId").equalTo(asResultInfoDataset.col("id")), "left")
|
||||||
|
.flatMap((FlatMapFunction<Tuple2<ResultPid,ASResultInfo>, Relation>) value -> {
|
||||||
|
List<Relation> relationList = new ArrayList<>();
|
||||||
|
if(Objects.nonNull(value._2())){
|
||||||
|
relationList.add(getRelation(value._2(), "result"));
|
||||||
|
relationList.add(getRelation(value._2(), "project"));
|
||||||
|
|
||||||
|
}
|
||||||
|
return relationList.iterator();
|
||||||
|
}, Encoders.bean(Relation.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.toJavaRDD()
|
||||||
|
.map(p -> new AtomicAction(Relation.class, p))
|
||||||
|
.mapToPair(
|
||||||
|
aa -> getTextTextTuple2(aa))
|
||||||
|
.saveAsHadoopFile(outputPathRelation, Text.class, Text.class, SequenceFileOutputFormat.class);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Relation getRelation(ASResultInfo asResultInfo, String type) {
|
||||||
|
Relation r = new Relation();
|
||||||
|
if(type.equals("result")){
|
||||||
|
r.setSource(asResultInfo.getId());
|
||||||
|
r.setRelClass("isProducedBy");
|
||||||
|
r.setTarget(asResultInfo.getValue().get(0).getValue());
|
||||||
|
}else{
|
||||||
|
r.setRelClass("produces");
|
||||||
|
r.setSource(asResultInfo.getValue().get(0).getValue());
|
||||||
|
r.setTarget(asResultInfo.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
r.setRelType("resultProject");
|
||||||
|
r.setSubRelType("outcome");
|
||||||
|
|
||||||
|
r.setDataInfo(getDataInfo(asResultInfo));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static DataInfo getDataInfo(ASResultInfo asResultInfo) {
|
||||||
|
DataInfo di = new DataInfo();
|
||||||
|
di.setInvisible(false) ;
|
||||||
|
di.setInvisible(true);
|
||||||
|
di.setDeletedbyinference(false);
|
||||||
|
di.setTrust(asResultInfo.getValue().get(0).getTrust());
|
||||||
|
di.setInferenceprovenance(asResultInfo.getValue().get(0).getInference_provenance());
|
||||||
|
Qualifier pAction = new Qualifier();
|
||||||
|
pAction.setClassid("iis");
|
||||||
|
pAction.setClassname("iss");
|
||||||
|
pAction.setSchemename("dnet:provenanceActions");
|
||||||
|
pAction.setSchemeid("dnet:provenanceActions");
|
||||||
|
di.setProvenanceaction(pAction);
|
||||||
|
return di;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Tuple2<Text, Text> getTextTextTuple2(AtomicAction aa) throws JsonProcessingException {
|
||||||
|
String st = "";
|
||||||
|
System.out.println(st);
|
||||||
|
return new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||||
|
new Text(common.OBJECT_MAPPER.writeValueAsString(aa)));
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,12 +1,17 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionmanager.remapping;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
@ -15,7 +20,9 @@ import org.slf4j.LoggerFactory;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.Arrays;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
@ -41,8 +48,8 @@ public class SparkRedistributeIISResult implements Serializable {
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
final String outputPathResult = parser.get("outputPathResult");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPathResult: {}", outputPathResult);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final String outputPathRelation = parser.get("outputPathRelation");
|
final String outputPathRelation = parser.get("outputPathRelation");
|
||||||
log.info("outputPathRelation: {}", outputPathRelation);
|
log.info("outputPathRelation: {}", outputPathRelation);
|
||||||
|
@ -57,120 +64,53 @@ public class SparkRedistributeIISResult implements Serializable {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
common.removeOutputDir(spark, outputPathResult);
|
common.removeOutputDir(spark, outputPath);
|
||||||
Dataset<ResultPid> resultPidDataset = common.readPath(spark, inputPath + "/publication", ResultPid.class)
|
Dataset<ResultPid> resultPidDataset = common.readPath(spark, inputPath , ResultPid.class);
|
||||||
.union(common.readPath(spark, inputPath + "/dataset", ResultPid.class))
|
|
||||||
.union(common.readPath(spark, inputPath +"/software" , ResultPid.class))
|
|
||||||
.union(common.readPath(spark, inputPath +"/otherresearchproduct", ResultPid.class));
|
|
||||||
Dataset<ASResultInfo> asResultInfoDataset = common.readPath(spark, asInputPath, ASResultInfo.class);
|
Dataset<ASResultInfo> asResultInfoDataset = common.readPath(spark, asInputPath, ASResultInfo.class);
|
||||||
execResult(spark, asResultInfoDataset.filter("type = 'result'"), resultPidDataset, outputPathResult);
|
execResult(spark, asResultInfoDataset.filter("type = 'result'"), resultPidDataset, outputPath);
|
||||||
execRelation(spark, asResultInfoDataset.filter("type = 'relation'"), resultPidDataset, outputPathRelation);
|
// execRelation(spark, asResultInfoDataset.filter("type = 'relation'"), resultPidDataset, outputPathRelation);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void execRelation(SparkSession spark, Dataset<ASResultInfo> asResultInfoDataset,
|
private static void execResult(SparkSession spark, Dataset<ASResultInfo> info, Dataset<ResultPid> resultPidDataset, String outputPathResult){
|
||||||
Dataset<ResultPid> resultPidDataset, String outputPathRelation) {
|
info.joinWith(resultPidDataset, info.col("id").equalTo(resultPidDataset.col("resultId")), "left")
|
||||||
resultPidDataset.joinWith(asResultInfoDataset, resultPidDataset.col("resultId").equalTo(asResultInfoDataset.col("id")), "left")
|
.map((MapFunction<Tuple2<ASResultInfo,ResultPid>, Result>)value -> {
|
||||||
.flatMap((FlatMapFunction<Tuple2<ResultPid,ASResultInfo>, Relation>) value -> {
|
Result ri = null;
|
||||||
List<Relation> relationList = new ArrayList<>();
|
|
||||||
if(Objects.nonNull(value._2())){
|
if(Objects.nonNull(value._2())){
|
||||||
relationList.add(getRelation(value._2(), "result"));
|
ri = new Result();
|
||||||
relationList.add(getRelation(value._2(), "project"));
|
ASResultInfo asri = value._1();
|
||||||
|
ResultPid rp = value._2();
|
||||||
|
ri.setId(value._1().getId());
|
||||||
|
ri.setContext(asri.getValue()
|
||||||
|
.stream()
|
||||||
|
.map(c -> {
|
||||||
|
Context context = new Context();
|
||||||
|
context.setId(c.getValue());
|
||||||
|
DataInfo di = new DataInfo();
|
||||||
|
di.setInferenceprovenance(c.getInference_provenance());
|
||||||
|
di.setTrust(c.getTrust());
|
||||||
|
di.setDeletedbyinference(false);
|
||||||
|
Qualifier pa = new Qualifier();
|
||||||
|
pa.setClassname("iis");
|
||||||
|
pa.setClassid("iis");
|
||||||
|
pa.setSchemeid("dnet:provenanceActions");
|
||||||
|
pa.setSchemename("dnet:provenanceActions");
|
||||||
|
di.setProvenanceaction(pa);
|
||||||
|
context.setDataInfo(Arrays.asList(di));
|
||||||
|
return context;
|
||||||
|
}).collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
return relationList.iterator();
|
return ri;
|
||||||
}, Encoders.bean(Relation.class))
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.toJavaRDD()
|
|
||||||
.map(p -> new AtomicAction(Relation.class, p))
|
|
||||||
.mapToPair(
|
|
||||||
aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
|
||||||
new Text(common.OBJECT_MAPPER.writeValueAsString(aa))))
|
|
||||||
.saveAsHadoopFile(outputPathRelation, Text.class, Text.class, SequenceFileOutputFormat.class);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Relation getRelation(ASResultInfo asResultInfo, String type) {
|
|
||||||
Relation r = new Relation();
|
|
||||||
if(type.equals("result")){
|
|
||||||
r.setSource(asResultInfo.getId());
|
|
||||||
r.setRelClass("isProducedBy");
|
|
||||||
r.setTarget(asResultInfo.getValue().get(0).getValue());
|
|
||||||
}else{
|
|
||||||
r.setRelClass("produces");
|
|
||||||
r.setSource(asResultInfo.getValue().get(0).getValue());
|
|
||||||
r.setTarget(asResultInfo.getId());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
r.setRelType("resultProject");
|
|
||||||
r.setSubRelType("outcome");
|
|
||||||
|
|
||||||
r.setDataInfo(getDataInfo(asResultInfo));
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static DataInfo getDataInfo(ASResultInfo asResultInfo) {
|
|
||||||
DataInfo di = new DataInfo();
|
|
||||||
di.setInvisible(false) ;
|
|
||||||
di.setInvisible(true);
|
|
||||||
di.setDeletedbyinference(false);
|
|
||||||
di.setTrust(asResultInfo.getValue().get(0).getTrust());
|
|
||||||
di.setInferenceprovenance(asResultInfo.getValue().get(0).getInference_provenance());
|
|
||||||
Qualifier pAction = new Qualifier();
|
|
||||||
pAction.setClassid("iis");
|
|
||||||
pAction.setClassname("iss");
|
|
||||||
pAction.setSchemename("dnet:provenanceActions");
|
|
||||||
pAction.setSchemeid("dnet:provenanceActions");
|
|
||||||
di.setProvenanceaction(pAction);
|
|
||||||
return di;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void execResult(SparkSession spark, Dataset<ASResultInfo> asResultInfoDataset, Dataset<ResultPid> resultPidDataset, String outputPath){
|
|
||||||
|
|
||||||
resultPidDataset.joinWith(asResultInfoDataset, resultPidDataset.col("resultId").equalTo(asResultInfoDataset.col("id")), "left")
|
|
||||||
.map(value -> {
|
|
||||||
if(Objects.isNull(value._2())){
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
Result r = new Result();
|
|
||||||
ASResultInfo asResultInfo = value._2();
|
|
||||||
r.setId(asResultInfo.getId());
|
|
||||||
StructuredProperty pid = new StructuredProperty();
|
|
||||||
pid.setValue(value._1().getDoi());
|
|
||||||
Qualifier qualifier = new Qualifier();
|
|
||||||
qualifier.setClassid("doi");
|
|
||||||
qualifier.setClassname("doi");
|
|
||||||
qualifier.setSchemeid("dnet:pid");
|
|
||||||
qualifier.setSchemename("dnet:pid");
|
|
||||||
pid.setQualifier(qualifier);
|
|
||||||
r.setContext(asResultInfo.getValue().stream().map(v -> {
|
|
||||||
Context c = new Context();
|
|
||||||
c.setId(v.getValue());
|
|
||||||
DataInfo dataInfo = new DataInfo();
|
|
||||||
dataInfo.setTrust(v.getTrust());
|
|
||||||
dataInfo.setInferenceprovenance(v.getInference_provenance());
|
|
||||||
dataInfo.setDeletedbyinference(false);
|
|
||||||
dataInfo.setInferred(true);
|
|
||||||
dataInfo.setInvisible(false);
|
|
||||||
Qualifier pAction = new Qualifier();
|
|
||||||
pAction.setClassid("iis");
|
|
||||||
pAction.setClassname("iis");
|
|
||||||
pAction.setSchemeid("dnet:provenanceActions");
|
|
||||||
pAction.setSchemename("dnet:provenanceActions");
|
|
||||||
dataInfo.setProvenanceaction(pAction);
|
|
||||||
c.setDataInfo(Arrays.asList(dataInfo));
|
|
||||||
return c;
|
|
||||||
}).collect(Collectors.toList()));
|
|
||||||
return r;
|
|
||||||
}, Encoders.bean(Result.class))
|
}, Encoders.bean(Result.class))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.toJavaRDD()
|
.toJavaRDD()
|
||||||
.map(p -> new AtomicAction(Result.class, p))
|
.mapToPair(r -> getTextTextTuple2(r))
|
||||||
.mapToPair(
|
.saveAsHadoopFile(outputPathResult, Text.class, Text.class, SequenceFileOutputFormat.class);
|
||||||
aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
}
|
||||||
new Text(common.OBJECT_MAPPER.writeValueAsString(aa))))
|
|
||||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
|
||||||
|
|
||||||
|
private static Tuple2<Text, Text> getTextTextTuple2(Result r) throws JsonProcessingException {
|
||||||
|
AtomicAction aa = new AtomicAction(Result.class, r);
|
||||||
|
return new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||||
|
new Text(common.OBJECT_MAPPER.writeValueAsString(aa)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionmanager.remapping;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -39,10 +40,10 @@ public class SparkSelectResults implements Serializable {
|
||||||
|
|
||||||
final String inputPath = parser.get("inputPath");
|
final String inputPath = parser.get("inputPath");
|
||||||
|
|
||||||
final String resultClassName = parser.get("resultTableName");
|
// final String resultClassName = parser.get("resultTableName");
|
||||||
log.info("resultTableName: {}", resultClassName);
|
// log.info("resultTableName: {}", resultClassName);
|
||||||
|
//
|
||||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
// Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||||
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
@ -52,10 +53,15 @@ public class SparkSelectResults implements Serializable {
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
common.removeOutputDir(spark, outputPath);
|
common.removeOutputDir(spark, outputPath);
|
||||||
exec(spark, inputPath, outputPath, resultClazz);
|
run(spark, inputPath, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void run(SparkSession spark, String inputPath, String outputPath){
|
||||||
|
ModelSupport.resultTypes.keySet()
|
||||||
|
.forEach(key -> exec(spark, inputPath + "/" + key, outputPath, ModelSupport.resultTypes.get(key)));
|
||||||
|
}
|
||||||
|
|
||||||
private static <R extends Result> void exec(SparkSession spark, String inputPath, String outputPath, Class<R> resultClazz ){
|
private static <R extends Result> void exec(SparkSession spark, String inputPath, String outputPath, Class<R> resultClazz ){
|
||||||
Dataset<R> result = common.readPath(spark, inputPath, resultClazz);
|
Dataset<R> result = common.readPath(spark, inputPath, resultClazz);
|
||||||
|
|
||||||
|
@ -70,7 +76,7 @@ public class SparkSelectResults implements Serializable {
|
||||||
"cf.key = '10|openaire____::081b82f96300b6a6e3d282bad31cb6e2') " +
|
"cf.key = '10|openaire____::081b82f96300b6a6e3d282bad31cb6e2') " +
|
||||||
"and result.id not like '50|dedup%' ")
|
"and result.id not like '50|dedup%' ")
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Append)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath);
|
.json(outputPath);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "issm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "when true will stop SparkSession after job execution",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "as",
|
||||||
|
"paramLongName": "actionSets",
|
||||||
|
"paramDescription": "the allowed list of action sets",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ip",
|
||||||
|
"paramLongName": "inputPath",
|
||||||
|
"paramDescription": "the input path",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "op",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path of the new ActionSet",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"is",
|
||||||
|
"paramLongName":"isLookUpUrl",
|
||||||
|
"paramDescription": "URL of the isLookUp Service",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,26 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "issm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "when true will stop SparkSession after job execution",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "asp",
|
||||||
|
"paramLongName": "asInputPath",
|
||||||
|
"paramDescription": "the allowed list of action sets",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "rip",
|
||||||
|
"paramLongName": "relationInputPath",
|
||||||
|
"paramDescription": "the input path",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "op",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path of the new ActionSet",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,33 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "issm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "when true will stop SparkSession after job execution",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "as",
|
||||||
|
"paramLongName": "actionSets",
|
||||||
|
"paramDescription": "the allowed list of action sets",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ip",
|
||||||
|
"paramLongName": "inputPath",
|
||||||
|
"paramDescription": "the input path",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "op",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path of the new ActionSet",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"is",
|
||||||
|
"paramLongName":"isLookUpUrl",
|
||||||
|
"paramDescription": "URL of the isLookUp Service",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,4 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.remapping;
|
||||||
|
|
||||||
|
public class ExpandResultInfoTest {
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.remapping;
|
||||||
|
|
||||||
|
public class RedistributeIISResultTest {
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.remapping;
|
||||||
|
|
||||||
|
public class SelectResultTest {
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7348","inferenceprovenance":"iis::document_referencedProjects","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594214129915,"relType":"resultResult","subRelType":"dedup","relClass":"merges","source":"50|dedup_wf_001::1cba00616e303863c34fadaf797d0f8f","target":"50|base_oa_____::fb2c70723d74f45329640255a959333d","validated":null,"validationDate":null,"properties":[]}
|
||||||
|
{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7348","inferenceprovenance":"iis::document_referencedProjects","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594214129915,"relType":"resultProject","subRelType":"outcome","relClass":"merges","source":"50|dedup_wf_001::1cba00616e303863c34fadaf797d0f8f","target":"50|doiboost____::fb2c70723d74f45329640255a959333d","validated":null,"validationDate":null,"properties":[]}
|
||||||
|
{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7348","inferenceprovenance":"iis::document_referencedProjects","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594214129915,"relType":"resultProject","subRelType":"outcome","relClass":"merges","source":"50|dedup_wf_001::7df4b3b26df271628a837c209516902a","target":"50|doiboost____::8978b9b797294da5306950a94a58d98c","validated":null,"validationDate":null,"properties":[]}
|
||||||
|
{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7348","inferenceprovenance":"iis::document_referencedProjects","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594214129915,"relType":"resultProject","subRelType":"outcome","relClass":"merges","source":"50|dedup_wf_001::7df4b3b26df271628a837c209516902a","target":"50|doiboost____::78329557c23bee513963ebf295d1434d","validated":null,"validationDate":null,"properties":[]}
|
||||||
|
{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7348","inferenceprovenance":"iis::document_referencedProjects","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594214129915,"relType":"resultProject","subRelType":"outcome","relClass":"merges","source":"50|dedup_wf_001::b04d742132c133177e996add1325ec04","target":"50|od______3515::779de9b3a2d224779be52fae43b5fc80","validated":null,"validationDate":null,"properties":[]}
|
||||||
|
{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7348","inferenceprovenance":"iis::document_referencedProjects","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594214129915,"relType":"resultProject","subRelType":"outcome","relClass":"merges","source":"50|dedup_wf_001::b04d742132c133177e996add1325ec04","target":"50|doiboost____::0f10b8f21b7925a344f41edb774f0b0a","validated":null,"validationDate":null,"properties":[]}
|
||||||
|
{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7348","inferenceprovenance":"iis::document_referencedProjects","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594214129915,"relType":"resultProject","subRelType":"outcome","relClass":"merges","source":"50|dedup_wf_001::b04d742132c133177e996add1325ec04","target":"50|od_______166::779de9b3a2d224779be52fae43b5fc80","validated":null,"validationDate":null,"properties":[]}
|
||||||
|
{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7348","inferenceprovenance":"iis::document_referencedProjects","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594214129915,"relType":"resultProject","subRelType":"outcome","relClass":"merges","source":"50|dedup_wf_001::b04d742132c133177e996add1325ec04","target":"50|od_______165::779de9b3a2d224779be52fae43b5fc80","validated":null,"validationDate":null,"properties":[]}
|
||||||
|
{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7348","inferenceprovenance":"iis::document_referencedProjects","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594214129915,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|nsf_________::2bedb915e92b7dd25b082c6c2f241085","target":"50|od________18::c8e57f11074407d59f7114f047afd54e","validated":null,"validationDate":null,"properties":[]}
|
||||||
|
{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7348","inferenceprovenance":"iis::document_referencedProjects","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594214129915,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|nsf_________::2bedb915e92b7dd25b082c6c2f241085","target":"50|od________18::c8e57f11074407d59f7114f047afd54e","validated":null,"validationDate":null,"properties":[]}
|
||||||
|
{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7348","inferenceprovenance":"iis::document_referencedProjects","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594214129915,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|nsf_________::2bedb915e92b7dd25b082c6c2f241085","target":"50|od________18::c8e57f11074407d59f7114f047afd54e","validated":null,"validationDate":null,"properties":[]}
|
||||||
|
{"collectedfrom":null,"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7348","inferenceprovenance":"iis::document_referencedProjects","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1594214129915,"relType":"resultProject","subRelType":"outcome","relClass":"produces","source":"40|nsf_________::2bedb915e92b7dd25b082c6c2f241085","target":"50|od________18::c8e57f11074407d59f7114f047afd54e","validated":null,"validationDate":null,"properties":[]}
|
|
@ -0,0 +1,3 @@
|
||||||
|
{"dedupId":"50|dedup_wf_001::1cba00616e303863c34fadaf797d0f8f","merges":["50|base_oa_____::fb2c70723d74f45329640255a959333d","50|doiboost____::fb2c70723d74f45329640255a959333d"]}
|
||||||
|
{"dedupId":"50|dedup_wf_001::7df4b3b26df271628a837c209516902a","merges":["50|doiboost____::8978b9b797294da5306950a94a58d98c","50|doiboost____::78329557c23bee513963ebf295d1434d"]}
|
||||||
|
{"dedupId":"50|dedup_wf_001::b04d742132c133177e996add1325ec04","merges":["50|od______3515::779de9b3a2d224779be52fae43b5fc80","50|doiboost____::0f10b8f21b7925a344f41edb774f0b0a","50|od_______166::779de9b3a2d224779be52fae43b5fc80","50|od_______165::779de9b3a2d224779be52fae43b5fc80"]}
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue