code formatting
This commit is contained in:
parent
a0311e8a90
commit
1763d377ad
|
@ -18,7 +18,6 @@ package eu.dnetlib.pace.util;
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Diff Match and Patch
|
* Diff Match and Patch
|
||||||
* Copyright 2018 The diff-match-patch Authors.
|
* Copyright 2018 The diff-match-patch Authors.
|
||||||
|
|
|
@ -79,8 +79,8 @@ public class PrepareAffiliationRelationsTest {
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
String pubmedAffiliationRelationsPath = getClass()
|
String pubmedAffiliationRelationsPath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
|
.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
String outputPath = workingDir.toString() + "/actionSet";
|
String outputPath = workingDir.toString() + "/actionSet";
|
||||||
|
|
||||||
|
|
|
@ -31,94 +31,94 @@ import scala.Tuple2;
|
||||||
|
|
||||||
public class PrepareResultCommunitySet {
|
public class PrepareResultCommunitySet {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(PrepareResultCommunitySet.class);
|
private static final Logger log = LoggerFactory.getLogger(PrepareResultCommunitySet.class);
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
PrepareResultCommunitySet.class
|
PrepareResultCommunitySet.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json"));
|
"/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
String inputPath = parser.get("sourcePath");
|
String inputPath = parser.get("sourcePath");
|
||||||
log.info("inputPath: {}", inputPath);
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final boolean production = Boolean.valueOf(parser.get("production"));
|
final boolean production = Boolean.valueOf(parser.get("production"));
|
||||||
log.info("production: {}", production);
|
log.info("production: {}", production);
|
||||||
|
|
||||||
final CommunityEntityMap projectsMap = Utils.getCommunityProjects(production);
|
final CommunityEntityMap projectsMap = Utils.getCommunityProjects(production);
|
||||||
// log.info("projectsMap: {}", new Gson().toJson(projectsMap));
|
// log.info("projectsMap: {}", new Gson().toJson(projectsMap));
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
prepareInfo(spark, inputPath, outputPath, projectsMap);
|
prepareInfo(spark, inputPath, outputPath, projectsMap);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void prepareInfo(
|
private static void prepareInfo(
|
||||||
SparkSession spark,
|
SparkSession spark,
|
||||||
String inputPath,
|
String inputPath,
|
||||||
String outputPath,
|
String outputPath,
|
||||||
CommunityEntityMap projectMap) {
|
CommunityEntityMap projectMap) {
|
||||||
|
|
||||||
final StructType structureSchema = new StructType()
|
final StructType structureSchema = new StructType()
|
||||||
.add(
|
.add(
|
||||||
"dataInfo", new StructType()
|
"dataInfo", new StructType()
|
||||||
.add("deletedbyinference", DataTypes.BooleanType)
|
.add("deletedbyinference", DataTypes.BooleanType)
|
||||||
.add("invisible", DataTypes.BooleanType))
|
.add("invisible", DataTypes.BooleanType))
|
||||||
.add("source", DataTypes.StringType)
|
.add("source", DataTypes.StringType)
|
||||||
.add("target", DataTypes.StringType)
|
.add("target", DataTypes.StringType)
|
||||||
.add("relClass", DataTypes.StringType);
|
.add("relClass", DataTypes.StringType);
|
||||||
|
|
||||||
spark
|
spark
|
||||||
.read()
|
.read()
|
||||||
.schema(structureSchema)
|
.schema(structureSchema)
|
||||||
.json(inputPath)
|
.json(inputPath)
|
||||||
.filter(
|
.filter(
|
||||||
"dataInfo.deletedbyinference != true " +
|
"dataInfo.deletedbyinference != true " +
|
||||||
"and relClass == '" + ModelConstants.IS_PRODUCED_BY + "'")
|
"and relClass == '" + ModelConstants.IS_PRODUCED_BY + "'")
|
||||||
.select(
|
.select(
|
||||||
new Column("source").as("resultId"),
|
new Column("source").as("resultId"),
|
||||||
new Column("target").as("projectId"))
|
new Column("target").as("projectId"))
|
||||||
.groupByKey((MapFunction<Row, String>) r -> (String) r.getAs("resultId"), Encoders.STRING())
|
.groupByKey((MapFunction<Row, String>) r -> (String) r.getAs("resultId"), Encoders.STRING())
|
||||||
.mapGroups((MapGroupsFunction<String, Row, ResultProjectList>) (k, v) -> {
|
.mapGroups((MapGroupsFunction<String, Row, ResultProjectList>) (k, v) -> {
|
||||||
ResultProjectList rpl = new ResultProjectList();
|
ResultProjectList rpl = new ResultProjectList();
|
||||||
rpl.setResultId(k);
|
rpl.setResultId(k);
|
||||||
ArrayList<String> cl = new ArrayList<>();
|
ArrayList<String> cl = new ArrayList<>();
|
||||||
cl.addAll(projectMap.get(v.next().getAs("projectId")));
|
cl.addAll(projectMap.get(v.next().getAs("projectId")));
|
||||||
v.forEachRemaining(r -> {
|
v.forEachRemaining(r -> {
|
||||||
projectMap
|
projectMap
|
||||||
.get(r.getAs("projectId"))
|
.get(r.getAs("projectId"))
|
||||||
.forEach(c -> {
|
.forEach(c -> {
|
||||||
if (!cl.contains(c))
|
if (!cl.contains(c))
|
||||||
cl.add(c);
|
cl.add(c);
|
||||||
});
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
if (cl.size() == 0)
|
if (cl.size() == 0)
|
||||||
return null;
|
return null;
|
||||||
rpl.setCommunityList(cl);
|
rpl.setCommunityList(cl);
|
||||||
return rpl;
|
return rpl;
|
||||||
}, Encoders.bean(ResultProjectList.class))
|
}, Encoders.bean(ResultProjectList.class))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath);
|
.json(outputPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,22 +5,22 @@ import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
public class ResultProjectList implements Serializable {
|
public class ResultProjectList implements Serializable {
|
||||||
private String resultId;
|
private String resultId;
|
||||||
private ArrayList<String> communityList;
|
private ArrayList<String> communityList;
|
||||||
|
|
||||||
public String getResultId() {
|
public String getResultId() {
|
||||||
return resultId;
|
return resultId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setResultId(String resultId) {
|
public void setResultId(String resultId) {
|
||||||
this.resultId = resultId;
|
this.resultId = resultId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ArrayList<String> getCommunityList() {
|
public ArrayList<String> getCommunityList() {
|
||||||
return communityList;
|
return communityList;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCommunityList(ArrayList<String> communityList) {
|
public void setCommunityList(ArrayList<String> communityList) {
|
||||||
this.communityList = communityList;
|
this.communityList = communityList;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,127 +37,127 @@ import scala.Tuple2;
|
||||||
* @Date 11/10/23
|
* @Date 11/10/23
|
||||||
*/
|
*/
|
||||||
public class SparkResultToCommunityFromProject implements Serializable {
|
public class SparkResultToCommunityFromProject implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkResultToCommunityFromProject.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkResultToCommunityFromProject.class);
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
SparkResultToCommunityFromProject.class
|
SparkResultToCommunityFromProject.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json"));
|
"/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
String inputPath = parser.get("sourcePath");
|
String inputPath = parser.get("sourcePath");
|
||||||
log.info("inputPath: {}", inputPath);
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final String possibleupdatespath = parser.get("preparedInfoPath");
|
final String possibleupdatespath = parser.get("preparedInfoPath");
|
||||||
log.info("preparedInfoPath: {}", possibleupdatespath);
|
log.info("preparedInfoPath: {}", possibleupdatespath);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
|
|
||||||
execPropagation(spark, inputPath, outputPath, possibleupdatespath);
|
execPropagation(spark, inputPath, outputPath, possibleupdatespath);
|
||||||
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <R extends Result> void execPropagation(
|
private static <R extends Result> void execPropagation(
|
||||||
SparkSession spark,
|
SparkSession spark,
|
||||||
String inputPath,
|
String inputPath,
|
||||||
String outputPath,
|
String outputPath,
|
||||||
|
|
||||||
String possibleUpdatesPath) {
|
String possibleUpdatesPath) {
|
||||||
|
|
||||||
Dataset<ResultProjectList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultProjectList.class);
|
Dataset<ResultProjectList> possibleUpdates = readPath(spark, possibleUpdatesPath, ResultProjectList.class);
|
||||||
|
|
||||||
ModelSupport.entityTypes
|
ModelSupport.entityTypes
|
||||||
.keySet()
|
.keySet()
|
||||||
.parallelStream()
|
.parallelStream()
|
||||||
.forEach(e -> {
|
.forEach(e -> {
|
||||||
if (ModelSupport.isResult(e)) {
|
if (ModelSupport.isResult(e)) {
|
||||||
removeOutputDir(spark, outputPath + e.name());
|
removeOutputDir(spark, outputPath + e.name());
|
||||||
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||||
Dataset<R> result = readPath(spark, inputPath + e.name(), resultClazz);
|
Dataset<R> result = readPath(spark, inputPath + e.name(), resultClazz);
|
||||||
|
|
||||||
result
|
result
|
||||||
.joinWith(
|
.joinWith(
|
||||||
possibleUpdates,
|
possibleUpdates,
|
||||||
result.col("id").equalTo(possibleUpdates.col("resultId")),
|
result.col("id").equalTo(possibleUpdates.col("resultId")),
|
||||||
"left_outer")
|
"left_outer")
|
||||||
.map(resultCommunityFn(), Encoders.bean(resultClazz))
|
.map(resultCommunityFn(), Encoders.bean(resultClazz))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath + e.name());
|
.json(outputPath + e.name());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <R extends Result> MapFunction<Tuple2<R, ResultProjectList>, R> resultCommunityFn() {
|
private static <R extends Result> MapFunction<Tuple2<R, ResultProjectList>, R> resultCommunityFn() {
|
||||||
return value -> {
|
return value -> {
|
||||||
R ret = value._1();
|
R ret = value._1();
|
||||||
Optional<ResultProjectList> rcl = Optional.ofNullable(value._2());
|
Optional<ResultProjectList> rcl = Optional.ofNullable(value._2());
|
||||||
if (rcl.isPresent()) {
|
if (rcl.isPresent()) {
|
||||||
// ArrayList<String> communitySet = rcl.get().getCommunityList();
|
// ArrayList<String> communitySet = rcl.get().getCommunityList();
|
||||||
List<String> contextList = ret
|
List<String> contextList = ret
|
||||||
.getContext()
|
.getContext()
|
||||||
.stream()
|
.stream()
|
||||||
.map(Context::getId)
|
.map(Context::getId)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
R res = (R) ret.getClass().newInstance();
|
R res = (R) ret.getClass().newInstance();
|
||||||
|
|
||||||
res.setId(ret.getId());
|
res.setId(ret.getId());
|
||||||
List<Context> propagatedContexts = new ArrayList<>();
|
List<Context> propagatedContexts = new ArrayList<>();
|
||||||
for (String cId : rcl.get().getCommunityList()) {
|
for (String cId : rcl.get().getCommunityList()) {
|
||||||
if (!contextList.contains(cId)) {
|
if (!contextList.contains(cId)) {
|
||||||
Context newContext = new Context();
|
Context newContext = new Context();
|
||||||
newContext.setId(cId);
|
newContext.setId(cId);
|
||||||
newContext
|
newContext
|
||||||
.setDataInfo(
|
.setDataInfo(
|
||||||
Arrays
|
Arrays
|
||||||
.asList(
|
.asList(
|
||||||
getDataInfo(
|
getDataInfo(
|
||||||
PROPAGATION_DATA_INFO_TYPE,
|
PROPAGATION_DATA_INFO_TYPE,
|
||||||
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID,
|
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID,
|
||||||
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME,
|
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME,
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS)));
|
ModelConstants.DNET_PROVENANCE_ACTIONS)));
|
||||||
propagatedContexts.add(newContext);
|
propagatedContexts.add(newContext);
|
||||||
} else {
|
} else {
|
||||||
ret
|
ret
|
||||||
.getContext()
|
.getContext()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(c -> c.getId().equals(cId))
|
.filter(c -> c.getId().equals(cId))
|
||||||
.findFirst()
|
.findFirst()
|
||||||
.get()
|
.get()
|
||||||
.getDataInfo()
|
.getDataInfo()
|
||||||
.add(
|
.add(
|
||||||
getDataInfo(
|
getDataInfo(
|
||||||
PROPAGATION_DATA_INFO_TYPE,
|
PROPAGATION_DATA_INFO_TYPE,
|
||||||
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID,
|
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_ID,
|
||||||
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME,
|
PROPAGATION_RESULT_COMMUNITY_PROJECT_CLASS_NAME,
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS));
|
ModelConstants.DNET_PROVENANCE_ACTIONS));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
res.setContext(propagatedContexts);
|
res.setContext(propagatedContexts);
|
||||||
ret.mergeFrom(res);
|
ret.mergeFrom(res);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,103 +31,103 @@ import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
|
||||||
public class ResultToCommunityJobTest {
|
public class ResultToCommunityJobTest {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class);
|
private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class);
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void beforeAll() throws IOException {
|
public static void beforeAll() throws IOException {
|
||||||
workingDir = Files.createTempDirectory(ResultToCommunityJobTest.class.getSimpleName());
|
workingDir = Files.createTempDirectory(ResultToCommunityJobTest.class.getSimpleName());
|
||||||
log.info("using work dir {}", workingDir);
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.setAppName(ResultToCommunityJobTest.class.getSimpleName());
|
conf.setAppName(ResultToCommunityJobTest.class.getSimpleName());
|
||||||
|
|
||||||
conf.setMaster("local[*]");
|
conf.setMaster("local[*]");
|
||||||
conf.set("spark.driver.host", "localhost");
|
conf.set("spark.driver.host", "localhost");
|
||||||
conf.set("hive.metastore.local", "true");
|
conf.set("hive.metastore.local", "true");
|
||||||
conf.set("spark.ui.enabled", "false");
|
conf.set("spark.ui.enabled", "false");
|
||||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
spark = SparkSession
|
spark = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.appName(OrcidPropagationJobTest.class.getSimpleName())
|
.appName(OrcidPropagationJobTest.class.getSimpleName())
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterAll
|
@AfterAll
|
||||||
public static void afterAll() throws IOException {
|
public static void afterAll() throws IOException {
|
||||||
FileUtils.deleteDirectory(workingDir.toFile());
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
spark.stop();
|
spark.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testSparkResultToCommunityFromProjectJob() throws Exception {
|
void testSparkResultToCommunityFromProjectJob() throws Exception {
|
||||||
final String preparedInfoPath = getClass()
|
final String preparedInfoPath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/preparedInfo")
|
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/preparedInfo")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkResultToCommunityFromProject
|
SparkResultToCommunityFromProject
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", getClass()
|
"-sourcePath", getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/sample/")
|
.getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/sample/")
|
||||||
.getPath(),
|
.getPath(),
|
||||||
|
|
||||||
"-outputPath", workingDir.toString() + "/",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-preparedInfoPath", preparedInfoPath
|
"-preparedInfoPath", preparedInfoPath
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Dataset> tmp = sc
|
JavaRDD<Dataset> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/dataset")
|
.textFile(workingDir.toString() + "/dataset")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||||
|
|
||||||
Assertions.assertEquals(10, tmp.count());
|
Assertions.assertEquals(10, tmp.count());
|
||||||
/**
|
/**
|
||||||
* {"resultId":"50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f","communityList":["aurora"]}
|
* {"resultId":"50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f","communityList":["aurora"]}
|
||||||
* {"resultId":"50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e","communityList":["aurora"]}
|
* {"resultId":"50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e","communityList":["aurora"]}
|
||||||
* {"resultId":"50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1","communityList":["sdsn-gr"]}
|
* {"resultId":"50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1","communityList":["sdsn-gr"]}
|
||||||
* {"resultId":"50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1","communityList":["netherlands"]}
|
* {"resultId":"50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1","communityList":["netherlands"]}
|
||||||
*/
|
*/
|
||||||
List<Context> context = tmp
|
List<Context> context = tmp
|
||||||
.filter(r -> r.getId().equals("50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f"))
|
.filter(r -> r.getId().equals("50|57a035e5b1ae::d5be548ca7ae489d762f893be67af52f"))
|
||||||
.first()
|
.first()
|
||||||
.getContext();
|
.getContext();
|
||||||
Assertions.assertTrue(context.stream().anyMatch(c -> containsResultCommunityProject(c)));
|
Assertions.assertTrue(context.stream().anyMatch(c -> containsResultCommunityProject(c)));
|
||||||
|
|
||||||
context = tmp
|
context = tmp
|
||||||
.filter(r -> r.getId().equals("50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e"))
|
.filter(r -> r.getId().equals("50|57a035e5b1ae::a77232ffca9115fcad51c3503dbc7e3e"))
|
||||||
.first()
|
.first()
|
||||||
.getContext();
|
.getContext();
|
||||||
Assertions.assertTrue(context.stream().anyMatch(c -> containsResultCommunityProject(c)));
|
Assertions.assertTrue(context.stream().anyMatch(c -> containsResultCommunityProject(c)));
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
0, tmp.filter(r -> r.getId().equals("50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1")).count());
|
0, tmp.filter(r -> r.getId().equals("50|57a035e5b1ae::803aaad4decab7e27cd4b52a1931b3a1")).count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
0, tmp.filter(r -> r.getId().equals("50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1")).count());
|
0, tmp.filter(r -> r.getId().equals("50|57a035e5b1ae::a02e9e4087bca50687731ae5c765b5e1")).count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
2, tmp.filter(r -> r.getContext().stream().anyMatch(c -> c.getId().equals("aurora"))).count());
|
2, tmp.filter(r -> r.getContext().stream().anyMatch(c -> c.getId().equals("aurora"))).count());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean containsResultCommunityProject(Context c) {
|
private static boolean containsResultCommunityProject(Context c) {
|
||||||
return c
|
return c
|
||||||
.getDataInfo()
|
.getDataInfo()
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(di -> di.getProvenanceaction().getClassid().equals("result:community:project"));
|
.anyMatch(di -> di.getProvenanceaction().getClassid().equals("result:community:project"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue