minor and fixed wronf number is test because of change in the input resource
This commit is contained in:
parent
2cae97d049
commit
4dcd03b78e
|
@ -1,6 +1,5 @@
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
@ -10,6 +9,7 @@ import com.github.imifou.jsonschema.module.addon.AddonModule;
|
||||||
import com.github.victools.jsonschema.generator.*;
|
import com.github.victools.jsonschema.generator.*;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.ExecCreateSchemas;
|
import eu.dnetlib.dhp.ExecCreateSchemas;
|
||||||
|
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.Datasource;
|
import eu.dnetlib.dhp.oa.model.graph.Datasource;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
|
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.Organization;
|
import eu.dnetlib.dhp.oa.model.graph.Organization;
|
||||||
|
|
|
@ -8,6 +8,7 @@ import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.ForeachFunction;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
@ -71,7 +72,8 @@ public class SparkUpdateProjectInfo implements Serializable {
|
||||||
String preparedInfoPath) {
|
String preparedInfoPath) {
|
||||||
Dataset<CommunityResult> result = Utils.readPath(spark, inputPath, CommunityResult.class);
|
Dataset<CommunityResult> result = Utils.readPath(spark, inputPath, CommunityResult.class);
|
||||||
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
|
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
|
||||||
Dataset<CommunityResult> tmp = result
|
|
||||||
|
result
|
||||||
.joinWith(
|
.joinWith(
|
||||||
resultProject, result.col("id").equalTo(resultProject.col("resultId")),
|
resultProject, result.col("id").equalTo(resultProject.col("resultId")),
|
||||||
"left")
|
"left")
|
||||||
|
@ -79,9 +81,7 @@ public class SparkUpdateProjectInfo implements Serializable {
|
||||||
CommunityResult r = value._1();
|
CommunityResult r = value._1();
|
||||||
Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList()));
|
Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList()));
|
||||||
return r;
|
return r;
|
||||||
}, Encoders.bean(CommunityResult.class));
|
}, Encoders.bean(CommunityResult.class))
|
||||||
long count = tmp.count();
|
|
||||||
tmp
|
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
|
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
|
||||||
Encoders.STRING())
|
Encoders.STRING())
|
||||||
|
|
|
@ -88,7 +88,10 @@ public class SparkDumpFunderResults implements Serializable {
|
||||||
} else {
|
} else {
|
||||||
String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase();
|
String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase();
|
||||||
if (fName.equalsIgnoreCase("ec")) {
|
if (fName.equalsIgnoreCase("ec")) {
|
||||||
if (p.getId().contains("h2020")) {
|
if(p.getId().contains("he")){
|
||||||
|
fName += "_HE";
|
||||||
|
}
|
||||||
|
else if (p.getId().contains("h2020")) {
|
||||||
fName += "_H2020";
|
fName += "_H2020";
|
||||||
} else {
|
} else {
|
||||||
fName += "_FP7";
|
fName += "_FP7";
|
||||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresults;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
@ -17,6 +18,8 @@ import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
||||||
|
@ -99,13 +102,19 @@ public class SparkResultLinkedToProject implements Serializable {
|
||||||
.map(
|
.map(
|
||||||
t2._1(),
|
t2._1(),
|
||||||
communityMap, Constants.DUMPTYPE.FUNDER.getType());
|
communityMap, Constants.DUMPTYPE.FUNDER.getType());
|
||||||
cr.setProjects(t2._2().getProjectsList());
|
if (cr != null) {
|
||||||
|
cr.setProjects(t2._2().getProjectsList());
|
||||||
|
}
|
||||||
return cr;
|
return cr;
|
||||||
}, Encoders.bean(CommunityResult.class))
|
}, Encoders.bean(CommunityResult.class))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.map(
|
||||||
|
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
|
||||||
|
Encoders.STRING())
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath);
|
.text(outputPath);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -238,7 +238,7 @@
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>outputPath</name>
|
<name>outputPath</name>
|
||||||
<value>${workingDir}/tar</value>
|
<value>${outputPath}/dump</value>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sourcePath</name>
|
<name>sourcePath</name>
|
||||||
|
@ -279,7 +279,7 @@
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>outputPath</name>
|
<name>outputPath</name>
|
||||||
<value>${workingDir}/tar</value>
|
<value>${outputPath}/dump</value>
|
||||||
</property>
|
</property>
|
||||||
</configuration>
|
</configuration>
|
||||||
</sub-workflow>
|
</sub-workflow>
|
||||||
|
@ -299,7 +299,7 @@
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>outputPath</name>
|
<name>outputPath</name>
|
||||||
<value>${workingDir}/tar</value>
|
<value>${outputPath}/dump</value>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sourcePath</name>
|
<name>sourcePath</name>
|
||||||
|
@ -315,28 +315,28 @@
|
||||||
<error to="Kill" />
|
<error to="Kill" />
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="make_archive">
|
|
||||||
<java>
|
|
||||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
|
|
||||||
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
|
||||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
|
||||||
<arg>--sourcePath</arg><arg>${workingDir}/tar</arg>
|
|
||||||
</java>
|
|
||||||
<ok to="should_upload"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<!-- <action name="make_archive">-->
|
<!-- <action name="make_archive">-->
|
||||||
<!-- <java>-->
|
<!-- <java>-->
|
||||||
<!-- <main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>-->
|
<!-- <main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>-->
|
||||||
<!-- <arg>--hdfsPath</arg><arg>${outputPath}/tar</arg>-->
|
<!-- <arg>--hdfsPath</arg><arg>${outputPath}</arg>-->
|
||||||
<!-- <arg>--nameNode</arg><arg>${nameNode}</arg>-->
|
<!-- <arg>--nameNode</arg><arg>${nameNode}</arg>-->
|
||||||
<!-- <arg>--sourcePath</arg><arg>${outputPath}/dump</arg>-->
|
<!-- <arg>--sourcePath</arg><arg>${workingDir}/tar</arg>-->
|
||||||
<!-- </java>-->
|
<!-- </java>-->
|
||||||
<!-- <ok to="should_upload"/>-->
|
<!-- <ok to="should_upload"/>-->
|
||||||
<!-- <error to="Kill"/>-->
|
<!-- <error to="Kill"/>-->
|
||||||
<!-- </action>-->
|
<!-- </action>-->
|
||||||
|
|
||||||
|
<action name="make_archive">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
|
||||||
|
<arg>--hdfsPath</arg><arg>${outputPath}/tar</arg>
|
||||||
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--sourcePath</arg><arg>${outputPath}/dump</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="should_upload"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
<decision name="should_upload">
|
<decision name="should_upload">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="send_zenodo">${wf:conf('upload') eq true}</case>
|
<case to="send_zenodo">${wf:conf('upload') eq true}</case>
|
||||||
|
@ -347,7 +347,7 @@
|
||||||
<action name="send_zenodo">
|
<action name="send_zenodo">
|
||||||
<java>
|
<java>
|
||||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
|
||||||
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
<arg>--hdfsPath</arg><arg>${outputPath}/tar/</arg>
|
||||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||||
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
||||||
|
|
|
@ -456,7 +456,7 @@ public class DumpSubsetTest {
|
||||||
getClass()
|
getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/community_infrastructure")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/community_infrastructure")
|
||||||
.getPath())
|
.getPath())
|
||||||
.saveAsTextFile(workingDir.toString() + "/dump/community_infrastructure");
|
.saveAsTextFile(workingDir.toString() + "/dump/communities_infrastructures");
|
||||||
|
|
||||||
SparkSelectValidRelationContext
|
SparkSelectValidRelationContext
|
||||||
.main(
|
.main(
|
||||||
|
@ -512,10 +512,12 @@ public class DumpSubsetTest {
|
||||||
.textFile(workingDir.toString() + "/relation")
|
.textFile(workingDir.toString() + "/relation")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Relation.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Relation.class));
|
||||||
|
|
||||||
Assertions.assertEquals(94, tmp.count());
|
Assertions.assertEquals(102, tmp.count());
|
||||||
Assertions.assertEquals(47, tmp.filter(r -> r.getSource().getId().startsWith("50|")).count());
|
|
||||||
Assertions.assertEquals(36, tmp.filter(r -> r.getSource().getId().startsWith("10|")).count());
|
|
||||||
Assertions.assertEquals(11, tmp.filter(r -> r.getSource().getId().startsWith("00|")).count());
|
Assertions.assertEquals(51, tmp.filter(r -> r.getSource().getId().startsWith("50|")).count());
|
||||||
|
Assertions.assertEquals(39, tmp.filter(r -> r.getSource().getId().startsWith("10|")).count());
|
||||||
|
Assertions.assertEquals(12, tmp.filter(r -> r.getSource().getId().startsWith("00|")).count());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue