minor and fixed wronf number is test because of change in the input resource
This commit is contained in:
parent
2cae97d049
commit
4dcd03b78e
|
@ -1,6 +1,5 @@
|
|||
import java.io.IOException;
|
||||
|
||||
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
|
@ -10,6 +9,7 @@ import com.github.imifou.jsonschema.module.addon.AddonModule;
|
|||
import com.github.victools.jsonschema.generator.*;
|
||||
|
||||
import eu.dnetlib.dhp.ExecCreateSchemas;
|
||||
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
||||
import eu.dnetlib.dhp.oa.model.graph.Datasource;
|
||||
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
|
||||
import eu.dnetlib.dhp.oa.model.graph.Organization;
|
||||
|
|
|
@ -8,6 +8,7 @@ import java.util.Optional;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.ForeachFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
|
@ -71,7 +72,8 @@ public class SparkUpdateProjectInfo implements Serializable {
|
|||
String preparedInfoPath) {
|
||||
Dataset<CommunityResult> result = Utils.readPath(spark, inputPath, CommunityResult.class);
|
||||
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
|
||||
Dataset<CommunityResult> tmp = result
|
||||
|
||||
result
|
||||
.joinWith(
|
||||
resultProject, result.col("id").equalTo(resultProject.col("resultId")),
|
||||
"left")
|
||||
|
@ -79,9 +81,7 @@ public class SparkUpdateProjectInfo implements Serializable {
|
|||
CommunityResult r = value._1();
|
||||
Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList()));
|
||||
return r;
|
||||
}, Encoders.bean(CommunityResult.class));
|
||||
long count = tmp.count();
|
||||
tmp
|
||||
}, Encoders.bean(CommunityResult.class))
|
||||
.map(
|
||||
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
|
||||
Encoders.STRING())
|
||||
|
|
|
@ -88,7 +88,10 @@ public class SparkDumpFunderResults implements Serializable {
|
|||
} else {
|
||||
String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase();
|
||||
if (fName.equalsIgnoreCase("ec")) {
|
||||
if (p.getId().contains("h2020")) {
|
||||
if(p.getId().contains("he")){
|
||||
fName += "_HE";
|
||||
}
|
||||
else if (p.getId().contains("h2020")) {
|
||||
fName += "_H2020";
|
||||
} else {
|
||||
fName += "_FP7";
|
||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresults;
|
|||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
@ -17,6 +18,8 @@ import org.apache.spark.sql.SparkSession;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
||||
|
@ -99,13 +102,19 @@ public class SparkResultLinkedToProject implements Serializable {
|
|||
.map(
|
||||
t2._1(),
|
||||
communityMap, Constants.DUMPTYPE.FUNDER.getType());
|
||||
cr.setProjects(t2._2().getProjectsList());
|
||||
if (cr != null) {
|
||||
cr.setProjects(t2._2().getProjectsList());
|
||||
}
|
||||
return cr;
|
||||
}, Encoders.bean(CommunityResult.class))
|
||||
.filter(Objects::nonNull)
|
||||
.map(
|
||||
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
|
||||
Encoders.STRING())
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
.text(outputPath);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -238,7 +238,7 @@
|
|||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<value>${workingDir}/tar</value>
|
||||
<value>${outputPath}/dump</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
|
@ -279,7 +279,7 @@
|
|||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<value>${workingDir}/tar</value>
|
||||
<value>${outputPath}/dump</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</sub-workflow>
|
||||
|
@ -299,7 +299,7 @@
|
|||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<value>${workingDir}/tar</value>
|
||||
<value>${outputPath}/dump</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
|
@ -315,28 +315,28 @@
|
|||
<error to="Kill" />
|
||||
</action>
|
||||
|
||||
<action name="make_archive">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/tar</arg>
|
||||
</java>
|
||||
<ok to="should_upload"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<!-- <action name="make_archive">-->
|
||||
<!-- <java>-->
|
||||
<!-- <main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>-->
|
||||
<!-- <arg>--hdfsPath</arg><arg>${outputPath}/tar</arg>-->
|
||||
<!-- <arg>--hdfsPath</arg><arg>${outputPath}</arg>-->
|
||||
<!-- <arg>--nameNode</arg><arg>${nameNode}</arg>-->
|
||||
<!-- <arg>--sourcePath</arg><arg>${outputPath}/dump</arg>-->
|
||||
<!-- <arg>--sourcePath</arg><arg>${workingDir}/tar</arg>-->
|
||||
<!-- </java>-->
|
||||
<!-- <ok to="should_upload"/>-->
|
||||
<!-- <error to="Kill"/>-->
|
||||
<!-- </action>-->
|
||||
|
||||
<action name="make_archive">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${outputPath}/tar</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/dump</arg>
|
||||
</java>
|
||||
<ok to="should_upload"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<decision name="should_upload">
|
||||
<switch>
|
||||
<case to="send_zenodo">${wf:conf('upload') eq true}</case>
|
||||
|
@ -347,7 +347,7 @@
|
|||
<action name="send_zenodo">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
|
||||
<arg>--hdfsPath</arg><arg>${outputPath}/tar/</arg>
|
||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
|
||||
|
|
|
@ -456,7 +456,7 @@ public class DumpSubsetTest {
|
|||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/community_infrastructure")
|
||||
.getPath())
|
||||
.saveAsTextFile(workingDir.toString() + "/dump/community_infrastructure");
|
||||
.saveAsTextFile(workingDir.toString() + "/dump/communities_infrastructures");
|
||||
|
||||
SparkSelectValidRelationContext
|
||||
.main(
|
||||
|
@ -512,10 +512,12 @@ public class DumpSubsetTest {
|
|||
.textFile(workingDir.toString() + "/relation")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Relation.class));
|
||||
|
||||
Assertions.assertEquals(94, tmp.count());
|
||||
Assertions.assertEquals(47, tmp.filter(r -> r.getSource().getId().startsWith("50|")).count());
|
||||
Assertions.assertEquals(36, tmp.filter(r -> r.getSource().getId().startsWith("10|")).count());
|
||||
Assertions.assertEquals(11, tmp.filter(r -> r.getSource().getId().startsWith("00|")).count());
|
||||
Assertions.assertEquals(102, tmp.count());
|
||||
|
||||
|
||||
Assertions.assertEquals(51, tmp.filter(r -> r.getSource().getId().startsWith("50|")).count());
|
||||
Assertions.assertEquals(39, tmp.filter(r -> r.getSource().getId().startsWith("10|")).count());
|
||||
Assertions.assertEquals(12, tmp.filter(r -> r.getSource().getId().startsWith("00|")).count());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue