forked from D-Net/dnet-hadoop
This commit is contained in:
parent
da0d22e367
commit
e1e024c013
|
@ -0,0 +1,43 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<parent>
|
||||||
|
<artifactId>dhp-workflows</artifactId>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<version>1.2.4-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<artifactId>dhp-actionset-remapping</artifactId>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-sql_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-schemas</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-aggregation</artifactId>
|
||||||
|
<version>1.2.4-SNAPSHOT</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.common;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.common;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.common;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
|
||||||
public class common implements Serializable {
|
public class Common implements Serializable {
|
||||||
|
|
||||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.common;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.common;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ public class PrepareInfo implements Serializable {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
common.removeOutputDir(spark, outputPath);
|
Common.removeOutputDir(spark, outputPath);
|
||||||
exec(
|
exec(
|
||||||
spark, asInputPath, inputGraphPath, outputPath,
|
spark, asInputPath, inputGraphPath, outputPath,
|
||||||
actionSetList.stream().map(as -> {
|
actionSetList.stream().map(as -> {
|
||||||
|
@ -74,7 +74,7 @@ public class PrepareInfo implements Serializable {
|
||||||
private static void exec(SparkSession spark, String asInputPath, String graphInputPath, String outputPath,
|
private static void exec(SparkSession spark, String asInputPath, String graphInputPath, String outputPath,
|
||||||
List<ActionSet> actionSets) {
|
List<ActionSet> actionSets) {
|
||||||
|
|
||||||
Dataset<Relation> relation = common.readPath(spark, graphInputPath + "/relation", Relation.class);
|
Dataset<Relation> relation = Common.readPath(spark, graphInputPath + "/relation", Relation.class);
|
||||||
|
|
||||||
actionSets
|
actionSets
|
||||||
.forEach(
|
.forEach(
|
||||||
|
@ -104,7 +104,7 @@ public class PrepareInfo implements Serializable {
|
||||||
.createDataset(
|
.createDataset(
|
||||||
sc
|
sc
|
||||||
.sequenceFile(asInputPath, Text.class, Text.class)
|
.sequenceFile(asInputPath, Text.class, Text.class)
|
||||||
.map(a -> common.OBJECT_MAPPER.readValue(a._2().toString(), AtomicAction.class))
|
.map(a -> Common.OBJECT_MAPPER.readValue(a._2().toString(), AtomicAction.class))
|
||||||
.map(aa -> getAsResultInfo(aa))
|
.map(aa -> getAsResultInfo(aa))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.rdd(),
|
.rdd(),
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.common;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.common;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.common;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ public class SparkExpandResultInfo implements Serializable {
|
||||||
.toString(
|
.toString(
|
||||||
SparkExpandResultInfo.class
|
SparkExpandResultInfo.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/actionmanager/remapping/input_expand_parameters.json"));
|
"/eu/dnetlib/dhp/actionset/common/input_expand_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
@ -51,15 +51,15 @@ public class SparkExpandResultInfo implements Serializable {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
common.removeOutputDir(spark, outputPath);
|
Common.removeOutputDir(spark, outputPath);
|
||||||
exec(spark, inputPath, outputPath);
|
exec(spark, inputPath, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void exec(SparkSession spark, String inputPath, String outputPath) {
|
private static void exec(SparkSession spark, String inputPath, String outputPath) {
|
||||||
Dataset<RelationMerges> rel = common.readPath(spark, inputPath + "/relation", RelationMerges.class);
|
Dataset<RelationMerges> rel = Common.readPath(spark, inputPath + "/relation", RelationMerges.class);
|
||||||
|
|
||||||
Dataset<ASResultInfo> asResultInfo = common.readPath(spark, inputPath + "/actionset", ASResultInfo.class);
|
Dataset<ASResultInfo> asResultInfo = Common.readPath(spark, inputPath + "/actionset", ASResultInfo.class);
|
||||||
|
|
||||||
asResultInfo
|
asResultInfo
|
||||||
.joinWith(rel, asResultInfo.col("id").equalTo(rel.col("dedupId")), "left")
|
.joinWith(rel, asResultInfo.col("id").equalTo(rel.col("dedupId")), "left")
|
|
@ -1,27 +1,16 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.common;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
|
||||||
import org.apache.spark.sql.*;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
|
|
||||||
public class SparkPrepareInfo implements Serializable {
|
public class SparkPrepareInfo implements Serializable {
|
||||||
|
|
||||||
|
@ -32,7 +21,7 @@ public class SparkPrepareInfo implements Serializable {
|
||||||
.toString(
|
.toString(
|
||||||
SparkPrepareInfo.class
|
SparkPrepareInfo.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/actionmanager/remapping/input_prepare_parameters.json"));
|
"/eu/dnetlib/dhp/actionset/common/input_prepare_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.stable;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.stable;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
@ -22,6 +22,8 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionset.common.ASResultInfo;
|
||||||
|
import eu.dnetlib.dhp.actionset.common.Common;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
@ -36,7 +38,7 @@ public class SparkRedistributeIISRelations implements Serializable {
|
||||||
.toString(
|
.toString(
|
||||||
SparkRedistributeIISRelations.class
|
SparkRedistributeIISRelations.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/actionmanager/remapping/input_redistribute_parameters.json"));
|
"/eu/dnetlib/dhp/actionset/stable/input_redistribute_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
@ -60,9 +62,9 @@ public class SparkRedistributeIISRelations implements Serializable {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
common.removeOutputDir(spark, outputPath);
|
Common.removeOutputDir(spark, outputPath);
|
||||||
Dataset<ResultPid> resultPidDataset = common.readPath(spark, inputPath, ResultPid.class);
|
Dataset<ResultPid> resultPidDataset = Common.readPath(spark, inputPath, ResultPid.class);
|
||||||
Dataset<ASResultInfo> asResultInfoDataset = common.readPath(spark, asInputPath, ASResultInfo.class);
|
Dataset<ASResultInfo> asResultInfoDataset = Common.readPath(spark, asInputPath, ASResultInfo.class);
|
||||||
execRelation(spark, asResultInfoDataset.filter("type = 'relation'"), resultPidDataset, outputPath);
|
execRelation(spark, asResultInfoDataset.filter("type = 'relation'"), resultPidDataset, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -129,6 +131,6 @@ public class SparkRedistributeIISRelations implements Serializable {
|
||||||
String st = "";
|
String st = "";
|
||||||
System.out.println(st);
|
System.out.println(st);
|
||||||
return new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
return new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||||
new Text(common.OBJECT_MAPPER.writeValueAsString(aa)));
|
new Text(Common.OBJECT_MAPPER.writeValueAsString(aa)));
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.stable;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
@ -22,6 +22,8 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionset.common.ASResultInfo;
|
||||||
|
import eu.dnetlib.dhp.actionset.common.Common;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||||
|
@ -30,16 +32,16 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class SparkRedistributeIISResult implements Serializable {
|
public class SparkRedistributeIISResults implements Serializable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkRedistributeIISResult.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkRedistributeIISResults.class);
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
SparkRedistributeIISResult.class
|
SparkRedistributeIISResults.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/actionmanager/remapping/input_redistribute_parameters.json"));
|
"/eu/dnetlib/dhp/actionset/stable/input_redistribute_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
@ -66,9 +68,9 @@ public class SparkRedistributeIISResult implements Serializable {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
common.removeOutputDir(spark, outputPath);
|
Common.removeOutputDir(spark, outputPath);
|
||||||
Dataset<ResultPid> resultPidDataset = common.readPath(spark, inputPath, ResultPid.class);
|
Dataset<ResultPid> resultPidDataset = Common.readPath(spark, inputPath, ResultPid.class);
|
||||||
Dataset<ASResultInfo> asResultInfoDataset = common.readPath(spark, asInputPath, ASResultInfo.class);
|
Dataset<ASResultInfo> asResultInfoDataset = Common.readPath(spark, asInputPath, ASResultInfo.class);
|
||||||
execResult(spark, asResultInfoDataset.filter("type = 'result'"), resultPidDataset, outputPath);
|
execResult(spark, asResultInfoDataset.filter("type = 'result'"), resultPidDataset, outputPath);
|
||||||
// execRelation(spark, asResultInfoDataset.filter("type = 'relation'"), resultPidDataset,
|
// execRelation(spark, asResultInfoDataset.filter("type = 'relation'"), resultPidDataset,
|
||||||
// outputPathRelation);
|
// outputPathRelation);
|
||||||
|
@ -120,6 +122,6 @@ public class SparkRedistributeIISResult implements Serializable {
|
||||||
private static Tuple2<Text, Text> getTextTextTuple2(Result r) throws JsonProcessingException {
|
private static Tuple2<Text, Text> getTextTextTuple2(Result r) throws JsonProcessingException {
|
||||||
AtomicAction aa = new AtomicAction(Result.class, r);
|
AtomicAction aa = new AtomicAction(Result.class, r);
|
||||||
return new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
return new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||||
new Text(common.OBJECT_MAPPER.writeValueAsString(aa)));
|
new Text(Common.OBJECT_MAPPER.writeValueAsString(aa)));
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset.stable;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@ import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionset.common.Common;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
@ -26,7 +27,7 @@ public class SparkSelectResults implements Serializable {
|
||||||
.toString(
|
.toString(
|
||||||
SparkSelectResults.class
|
SparkSelectResults.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/actionmanager/remapping/input_select_parameters.json"));
|
"/eu/dnetlib/dhp/actionset/stable/input_select_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
@ -53,7 +54,7 @@ public class SparkSelectResults implements Serializable {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
common.removeOutputDir(spark, outputPath);
|
Common.removeOutputDir(spark, outputPath);
|
||||||
run(spark, inputPath, outputPath);
|
run(spark, inputPath, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -66,7 +67,7 @@ public class SparkSelectResults implements Serializable {
|
||||||
|
|
||||||
private static <R extends Result> void exec(SparkSession spark, String inputPath, String outputPath,
|
private static <R extends Result> void exec(SparkSession spark, String inputPath, String outputPath,
|
||||||
Class<R> resultClazz) {
|
Class<R> resultClazz) {
|
||||||
Dataset<R> result = common.readPath(spark, inputPath, resultClazz);
|
Dataset<R> result = Common.readPath(spark, inputPath, resultClazz);
|
||||||
|
|
||||||
result.createOrReplaceTempView("result");
|
result.createOrReplaceTempView("result");
|
||||||
|
|
|
@ -51,4 +51,8 @@
|
||||||
<name>sparkExecutorCores</name>
|
<name>sparkExecutorCores</name>
|
||||||
<value>1</value>
|
<value>1</value>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
</configuration>
|
</configuration>
|
|
@ -30,8 +30,10 @@
|
||||||
</kill>
|
</kill>
|
||||||
<action name="deleteoutputpath">
|
<action name="deleteoutputpath">
|
||||||
<fs>
|
<fs>
|
||||||
<delete path='${outputPath}'/>
|
<delete path='${relationOutputPath}'/>
|
||||||
<mkdir path='${outputPath}'/>
|
<mkdir path='${relationOutputPath}'/>
|
||||||
|
<delete path='${resultOutputPath}'/>
|
||||||
|
<mkdir path='${resultOutputPath}'/>
|
||||||
<delete path='${workingDir}'/>
|
<delete path='${workingDir}'/>
|
||||||
<mkdir path='${workingDir}'/>
|
<mkdir path='${workingDir}'/>
|
||||||
</fs>
|
</fs>
|
||||||
|
@ -45,8 +47,8 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>PrepareInfo</name>
|
<name>PrepareInfo</name>
|
||||||
<class>eu.dnetlib.dhp.actionmanager.remapping.SparkPrepareInfo</class>
|
<class>eu.dnetlib.dhp.actionset.common.SparkPrepareInfo</class>
|
||||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
<jar>dhp-actionset-remapping-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
@ -59,7 +61,7 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||||
<arg>--actionSets</arg><arg>${actionSets}</arg>
|
<arg>--actionSets</arg><arg>${actionSets}</arg>
|
||||||
<arg>--inputPath</arg><arg>${inputPath</arg>
|
<arg>--inputPath</arg><arg>${inputPath}</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="prepare_next"/>
|
<ok to="prepare_next"/>
|
||||||
|
@ -76,8 +78,8 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>ExpandRelation</name>
|
<name>ExpandRelation</name>
|
||||||
<class>eu.dnetlib.dhp.actionmanager.remapping.SparkExpandResultInfo</class>
|
<class>eu.dnetlib.dhp.actionset.common.SparkExpandResultInfo</class>
|
||||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
<jar>dhp-actionset-remapping-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
@ -100,8 +102,8 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>SelectResults</name>
|
<name>SelectResults</name>
|
||||||
<class>eu.dnetlib.dhp.actionmanager.remapping.SparkSelectResults</class>
|
<class>eu.dnetlib.dhp.actionset.stable.SparkSelectResults</class>
|
||||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
<jar>dhp-actionset-remapping-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
@ -131,8 +133,8 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>RedistributeRelation</name>
|
<name>RedistributeRelation</name>
|
||||||
<class>eu.dnetlib.dhp.actionmanager.remapping.SparkRedistributeIISRelations</class>
|
<class>eu.dnetlib.dhp.actionset.stable.SparkRedistributeIISRelations</class>
|
||||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
<jar>dhp-actionset-remapping-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
@ -145,7 +147,7 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${workingDir}/selectedResults</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/selectedResults</arg>
|
||||||
<arg>--asInputPath</arg><arg>${workingDir}/expandedActionSet</arg>
|
<arg>--asInputPath</arg><arg>${workingDir}/expandedActionSet</arg>
|
||||||
<arg>--outputPath</arg><arg>${relationOutputPath></arg>
|
<arg>--outputPath</arg><arg>${relationOutputPath}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -156,8 +158,8 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>SelectResults</name>
|
<name>SelectResults</name>
|
||||||
<class>eu.dnetlib.dhp.actionmanager.remapping.SparkRedistributeIISResult</class>
|
<class>eu.dnetlib.dhp.actionset.stable.SparkRedistributeIISResult</class>
|
||||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
<jar>dhp-actionset-remapping-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
@ -170,7 +172,7 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${workingDir}/selectedResults</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/selectedResults</arg>
|
||||||
<arg>--asInputPath</arg><arg>${workingDir}/expandedActionSet</arg>
|
<arg>--asInputPath</arg><arg>${workingDir}/expandedActionSet</arg>
|
||||||
<arg>--outputPath</arg><arg>${resultOutputPath></arg>
|
<arg>--outputPath</arg><arg>${resultOutputPath}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
|
@ -1,12 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.neethi.Assertion;
|
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
@ -23,15 +22,13 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.project.SparkAtomicActionJob;
|
import eu.dnetlib.dhp.actionset.common.ASResultInfo;
|
||||||
|
import eu.dnetlib.dhp.actionset.common.SparkExpandResultInfo;
|
||||||
|
|
||||||
public class ExpandResultInfoTest {
|
public class ExpandResultInfoTest {
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final ClassLoader cl = PrepareInfoTest.class
|
|
||||||
.getClassLoader();
|
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
private static final String FAKE_ISLOOKUP = "http://beta.services.openaire.eu/";
|
private static final String FAKE_ISLOOKUP = "http://beta.services.openaire.eu/";
|
||||||
|
|
||||||
|
@ -76,7 +73,7 @@ public class ExpandResultInfoTest {
|
||||||
"-isSparkSessionManaged",
|
"-isSparkSessionManaged",
|
||||||
Boolean.FALSE.toString(),
|
Boolean.FALSE.toString(),
|
||||||
"-inputPath",
|
"-inputPath",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step2/preparedInfo").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step2/preparedInfo").getPath(),
|
||||||
"-outputPath",
|
"-outputPath",
|
||||||
workingDir.toString() + "/expandedActionSet"
|
workingDir.toString() + "/expandedActionSet"
|
||||||
});
|
});
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
|
@ -22,6 +22,11 @@ import org.slf4j.LoggerFactory;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionset.common.ASResultInfo;
|
||||||
|
import eu.dnetlib.dhp.actionset.common.ActionSet;
|
||||||
|
import eu.dnetlib.dhp.actionset.common.PrepareInfo;
|
||||||
|
import eu.dnetlib.dhp.actionset.common.RelationMerges;
|
||||||
|
|
||||||
@Disabled
|
@Disabled
|
||||||
public class PrepareInfoTest {
|
public class PrepareInfoTest {
|
||||||
|
|
||||||
|
@ -187,8 +192,8 @@ public class PrepareInfoTest {
|
||||||
public void testRelationsAS() {
|
public void testRelationsAS() {
|
||||||
PrepareInfo pi = new PrepareInfo(false,
|
PrepareInfo pi = new PrepareInfo(false,
|
||||||
workingDir.toString() + "/preparedInfo",
|
workingDir.toString() + "/preparedInfo",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step1/asInputPath").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step1/asInputPath").getPath(),
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step1").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step1").getPath(),
|
||||||
new Gson().fromJson("[\"iis-referenced-projects-main\"]", List.class),
|
new Gson().fromJson("[\"iis-referenced-projects-main\"]", List.class),
|
||||||
actionSetList);
|
actionSetList);
|
||||||
|
|
||||||
|
@ -286,8 +291,8 @@ public class PrepareInfoTest {
|
||||||
public void testInitiative() {
|
public void testInitiative() {
|
||||||
PrepareInfo pi = new PrepareInfo(false,
|
PrepareInfo pi = new PrepareInfo(false,
|
||||||
workingDir.toString() + "/preparedInfo",
|
workingDir.toString() + "/preparedInfo",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step1/asInputPath").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step1/asInputPath").getPath(),
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step1").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step1").getPath(),
|
||||||
new Gson().fromJson("[\"iis-researchinitiative\"]", List.class),
|
new Gson().fromJson("[\"iis-researchinitiative\"]", List.class),
|
||||||
actionSetList);
|
actionSetList);
|
||||||
|
|
||||||
|
@ -355,8 +360,8 @@ public class PrepareInfoTest {
|
||||||
public void testCommunities() {
|
public void testCommunities() {
|
||||||
PrepareInfo pi = new PrepareInfo(false,
|
PrepareInfo pi = new PrepareInfo(false,
|
||||||
workingDir.toString() + "/preparedInfo",
|
workingDir.toString() + "/preparedInfo",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step1/asInputPath").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step1/asInputPath").getPath(),
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step1").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step1").getPath(),
|
||||||
new Gson().fromJson("[\"iis-communities\"]", List.class),
|
new Gson().fromJson("[\"iis-communities\"]", List.class),
|
||||||
actionSetList);
|
actionSetList);
|
||||||
|
|
||||||
|
@ -416,8 +421,8 @@ public class PrepareInfoTest {
|
||||||
public void testCovid19() {
|
public void testCovid19() {
|
||||||
PrepareInfo pi = new PrepareInfo(false,
|
PrepareInfo pi = new PrepareInfo(false,
|
||||||
workingDir.toString() + "/preparedInfo",
|
workingDir.toString() + "/preparedInfo",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step1/asInputPath").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step1/asInputPath").getPath(),
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step1").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step1").getPath(),
|
||||||
new Gson().fromJson("[\"iis-covid-19\"]", List.class),
|
new Gson().fromJson("[\"iis-covid-19\"]", List.class),
|
||||||
actionSetList);
|
actionSetList);
|
||||||
|
|
||||||
|
@ -462,8 +467,8 @@ public class PrepareInfoTest {
|
||||||
public void testAll() {
|
public void testAll() {
|
||||||
PrepareInfo pi = new PrepareInfo(false,
|
PrepareInfo pi = new PrepareInfo(false,
|
||||||
workingDir.toString() + "/preparedInfo",
|
workingDir.toString() + "/preparedInfo",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step1/asInputPath").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step1/asInputPath").getPath(),
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step1").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step1").getPath(),
|
||||||
new Gson()
|
new Gson()
|
||||||
.fromJson(
|
.fromJson(
|
||||||
"[\"iis-researchinitiative\",\"iis-referenced-projects-main\",\"iis-communities\",\"iis-covid-19\"]",
|
"[\"iis-researchinitiative\",\"iis-referenced-projects-main\",\"iis-communities\",\"iis-covid-19\"]",
|
||||||
|
@ -497,8 +502,8 @@ public class PrepareInfoTest {
|
||||||
public void testRelationMerged() {
|
public void testRelationMerged() {
|
||||||
PrepareInfo pi = new PrepareInfo(false,
|
PrepareInfo pi = new PrepareInfo(false,
|
||||||
workingDir.toString() + "/preparedInfo",
|
workingDir.toString() + "/preparedInfo",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step1/asInputPath").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step1/asInputPath").getPath(),
|
||||||
getClass().getResource("/eu/dnetlib/dhp/actionmanager/remapping/step1").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/actionset/step1").getPath(),
|
||||||
new Gson().fromJson("[]", List.class),
|
new Gson().fromJson("[]", List.class),
|
||||||
actionSetList);
|
actionSetList);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
|
@ -22,30 +22,29 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.remapping.SparkRedistributeIISResult;
|
||||||
|
import eu.dnetlib.dhp.actionset.stable.SparkRedistributeIISRelations;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class RedistributeIISResultTest {
|
public class RedistributeIISLinksTest {
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final ClassLoader cl = RedistributeIISResultTest.class
|
|
||||||
.getClassLoader();
|
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
private static final Logger log = LoggerFactory
|
private static final Logger log = LoggerFactory
|
||||||
.getLogger(RedistributeIISResultTest.class);
|
.getLogger(RedistributeIISLinksTest.class);
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void beforeAll() throws IOException {
|
public static void beforeAll() throws IOException {
|
||||||
workingDir = Files
|
workingDir = Files
|
||||||
.createTempDirectory(RedistributeIISResultTest.class.getSimpleName());
|
.createTempDirectory(RedistributeIISLinksTest.class.getSimpleName());
|
||||||
log.info("using work dir {}", workingDir);
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.setAppName(RedistributeIISResultTest.class.getSimpleName());
|
conf.setAppName(RedistributeIISLinksTest.class.getSimpleName());
|
||||||
|
|
||||||
conf.setMaster("local[*]");
|
conf.setMaster("local[*]");
|
||||||
conf.set("spark.driver.host", "localhost");
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
@ -56,7 +55,7 @@ public class RedistributeIISResultTest {
|
||||||
|
|
||||||
spark = SparkSession
|
spark = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.appName(RedistributeIISResultTest.class.getSimpleName())
|
.appName(RedistributeIISLinksTest.class.getSimpleName())
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
}
|
}
|
||||||
|
@ -76,13 +75,13 @@ public class RedistributeIISResultTest {
|
||||||
Boolean.FALSE.toString(),
|
Boolean.FALSE.toString(),
|
||||||
"-asInputPath",
|
"-asInputPath",
|
||||||
getClass()
|
getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/actionmanager/remapping/step4/actionset")
|
.getResource("/eu/dnetlib/dhp/actionset/step4/actionset")
|
||||||
.getPath(),
|
.getPath(),
|
||||||
"-outputPath",
|
"-outputPath",
|
||||||
workingDir.toString() + "/relationActionSet",
|
workingDir.toString() + "/relationActionSet",
|
||||||
"-inputPath",
|
"-inputPath",
|
||||||
getClass()
|
getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/actionmanager/remapping/step4/result")
|
.getResource("/eu/dnetlib/dhp/actionset/step4/result")
|
||||||
.getPath()
|
.getPath()
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -156,13 +155,13 @@ public class RedistributeIISResultTest {
|
||||||
Boolean.FALSE.toString(),
|
Boolean.FALSE.toString(),
|
||||||
"-asInputPath",
|
"-asInputPath",
|
||||||
getClass()
|
getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/actionmanager/remapping/step4/actionset")
|
.getResource("/eu/dnetlib/dhp/actionset/step4/actionset")
|
||||||
.getPath(),
|
.getPath(),
|
||||||
"-outputPath",
|
"-outputPath",
|
||||||
workingDir.toString() + "/resultActionSet",
|
workingDir.toString() + "/resultActionSet",
|
||||||
"-inputPath",
|
"-inputPath",
|
||||||
getClass()
|
getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/actionmanager/remapping/step4/result")
|
.getResource("/eu/dnetlib/dhp/actionset/step4/result")
|
||||||
.getPath()
|
.getPath()
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -179,50 +178,6 @@ public class RedistributeIISResultTest {
|
||||||
|
|
||||||
verificationDataset.createOrReplaceTempView("verificationDataset");
|
verificationDataset.createOrReplaceTempView("verificationDataset");
|
||||||
|
|
||||||
// Assertions.assertEquals(0, verificationDataset.filter("substr(id,1,8) = '50|dedup'").count());
|
|
||||||
//
|
|
||||||
// Assertions.assertEquals(3, verificationDataset.filter("id = '50|doiboost____::0f10b8f21b7925a344f41edb774f0b0a'").count());
|
|
||||||
// Assertions.assertEquals(3, verificationDataset.filter("id = '50|od_______166::779de9b3a2d224779be52fae43b5fc80'").count());
|
|
||||||
// Assertions.assertEquals(3, verificationDataset.filter("id = '50|od_______165::779de9b3a2d224779be52fae43b5fc80'").count());
|
|
||||||
// Assertions.assertEquals(3, verificationDataset.filter("id = '50|od______3515::779de9b3a2d224779be52fae43b5fc80'").count());
|
|
||||||
//
|
|
||||||
// Assertions.assertEquals(2, verificationDataset.filter("id = '50|doiboost____::78329557c23bee513963ebf295d1434d'").count());
|
|
||||||
// Assertions.assertEquals(2, verificationDataset.filter("id = '50|doiboost____::8978b9b797294da5306950a94a58d98c'").count());
|
|
||||||
// Assertions.assertEquals(2, verificationDataset.filter("id = '50|doiboost____::fb2c70723d74f45329640255a959333d'").count());
|
|
||||||
// Assertions.assertEquals(2, verificationDataset.filter("id = '50|base_oa_____::fb2c70723d74f45329640255a959333d'").count());
|
|
||||||
//
|
|
||||||
// Assertions.assertEquals(5, verificationDataset.filter("id = '50|_____OmicsDI::039dbb63f11b19dc15113b34ebceb0d2' " +
|
|
||||||
// "or id = '50|_____OmicsDI::05f133acca27d72866c6720a95515f57' or " +
|
|
||||||
// "id = '50|_____OmicsDI::2d508eba981699a30e969d1ab5a068b8' or " +
|
|
||||||
// "id = '50|datacite____::00bddedc38dc045780dc84c27bc8fecd' or " +
|
|
||||||
// "id = '50|datacite____::00f7f89392fa75e944dc8d329e9e8024'").count());
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// verificationDataset.createOrReplaceTempView("verificationDataset");
|
|
||||||
//
|
|
||||||
// Dataset<Row> verify = spark.sql(("SELECT id, type, val.value value, val.trust trust, val.inference_provenance prov " +
|
|
||||||
// "FROM verificationDataset " +
|
|
||||||
// "LATERAL VIEW EXPLODE(value) v as val"));
|
|
||||||
//
|
|
||||||
// Assertions.assertEquals(25, verify.count());
|
|
||||||
//
|
|
||||||
// Assertions.assertEquals(20, verify.filter("type = 'relation'").count());
|
|
||||||
// Assertions.assertEquals(5, verify.filter("type = 'result'").count());
|
|
||||||
//
|
|
||||||
// Assertions.assertEquals(1, verify.filter("id = '50|doiboost____::0f10b8f21b7925a344f41edb774f0b0a' " +
|
|
||||||
// "and value = '40|rcuk________::8dec51859e6b66cd040670b432b9e59c' and " +
|
|
||||||
// "prov = 'iis::document_referencedProjects' and " +
|
|
||||||
// "trust = '0.897'").count());
|
|
||||||
//
|
|
||||||
// Assertions.assertEquals(1, verify.filter("id = '50|doiboost____::0f10b8f21b7925a344f41edb774f0b0a' " +
|
|
||||||
// "and value = '40|rcuk________::5e312e08bd65f126d7d79b3d1d677eb3' and " +
|
|
||||||
// "prov = 'iis::document_referencedProjects' and " +
|
|
||||||
// "trust = '0.897'").count());
|
|
||||||
//
|
|
||||||
// Assertions.assertEquals(1, verify.filter("id = '50|doiboost____::0f10b8f21b7925a344f41edb774f0b0a' " +
|
|
||||||
// "and value = '40|corda_______::6d500f8fceb2bb81b0750820469e1cd8' and " +
|
|
||||||
// "prov = 'iis::document_referencedProjects' and " +
|
|
||||||
// "trust = '0.7085'").count());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.remapping;
|
package eu.dnetlib.dhp.actionset;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
|
@ -21,12 +21,12 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionset.stable.ResultPid;
|
||||||
|
import eu.dnetlib.dhp.actionset.stable.SparkSelectResults;
|
||||||
|
|
||||||
public class SelectResultTest {
|
public class SelectResultTest {
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final ClassLoader cl = SelectResultTest.class
|
|
||||||
.getClassLoader();
|
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
|
@ -71,7 +71,7 @@ public class SelectResultTest {
|
||||||
Boolean.FALSE.toString(),
|
Boolean.FALSE.toString(),
|
||||||
"-inputPath",
|
"-inputPath",
|
||||||
getClass()
|
getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/actionmanager/remapping/step3")
|
.getResource("/eu/dnetlib/dhp/actionset/step3")
|
||||||
.getPath(),
|
.getPath(),
|
||||||
"-outputPath",
|
"-outputPath",
|
||||||
workingDir.toString() + "/selectedResults"
|
workingDir.toString() + "/selectedResults"
|
Loading…
Reference in New Issue