added test for the dump of the EOSC IF Tag. Fixed issue in serialization on the cluster
This commit is contained in:
parent
5b32bf1482
commit
97f26a205a
|
@ -55,4 +55,13 @@ public class EoscInteroperabilityFramework implements Serializable {
|
||||||
this.semanticRelation = semanticRelation;
|
this.semanticRelation = semanticRelation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static EoscInteroperabilityFramework newInstance(String code, String label, String url,
|
||||||
|
String semanticRelation) {
|
||||||
|
EoscInteroperabilityFramework eif = new EoscInteroperabilityFramework();
|
||||||
|
eif.label = label;
|
||||||
|
eif.code = code;
|
||||||
|
eif.url = url;
|
||||||
|
eif.semanticRelation = semanticRelation;
|
||||||
|
return eif;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,6 @@ public class EoscResult extends GraphResult {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setEoscIF(EoscInteroperabilityFramework eoscIF) {
|
public void setEoscIF(EoscInteroperabilityFramework eoscIF) {
|
||||||
eoscIF = eoscIF;
|
this.eoscIF = eoscIF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,9 +6,15 @@ import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.eosc.model.EoscInteroperabilityFramework;
|
import eu.dnetlib.dhp.eosc.model.EoscInteroperabilityFramework;
|
||||||
import eu.dnetlib.dhp.eosc.model.EoscResult;
|
import eu.dnetlib.dhp.eosc.model.EoscResult;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
|
||||||
import eu.dnetlib.dhp.oa.model.*;
|
import eu.dnetlib.dhp.oa.model.*;
|
||||||
|
@ -28,6 +34,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class ResultMapper implements Serializable {
|
public class ResultMapper implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(ResultMapper.class);
|
||||||
|
|
||||||
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
|
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
|
||||||
E in, Map<String, String> communityMap, String dumpType)
|
E in, Map<String, String> communityMap, String dumpType)
|
||||||
|
@ -150,6 +157,10 @@ public class ResultMapper implements Serializable {
|
||||||
((GraphResult) out)
|
((GraphResult) out)
|
||||||
.setInstance(
|
.setInstance(
|
||||||
oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList()));
|
oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList()));
|
||||||
|
} else if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) {
|
||||||
|
((EoscResult) out)
|
||||||
|
.setInstance(
|
||||||
|
oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList()));
|
||||||
} else {
|
} else {
|
||||||
((CommunityResult) out)
|
((CommunityResult) out)
|
||||||
.setInstance(
|
.setInstance(
|
||||||
|
@ -237,12 +248,15 @@ public class ResultMapper implements Serializable {
|
||||||
"EOSC IF in the result has cardinality greater than one. Change dump!");
|
"EOSC IF in the result has cardinality greater than one. Change dump!");
|
||||||
}
|
}
|
||||||
if (gei.size() == 1) {
|
if (gei.size() == 1) {
|
||||||
EoscInteroperabilityFramework eif = new EoscInteroperabilityFramework();
|
|
||||||
eif.setCode(gei.get(0).getCode());
|
EoscIfGuidelines ifra = gei.get(0);
|
||||||
eif.setLabel(gei.get(0).getLabel());
|
((EoscResult) out)
|
||||||
eif.setUrl(gei.get(0).getUrl());
|
.setEoscIF(
|
||||||
eif.setSemanticRelation(gei.get(0).getSemanticRelation());
|
EoscInteroperabilityFramework
|
||||||
((EoscResult) out).setEoscIF(eif);
|
.newInstance(
|
||||||
|
ifra.getCode(), ifra.getLabel(), ifra.getUrl(),
|
||||||
|
ifra.getSemanticRelation()));
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
|
} else if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
|
||||||
|
|
|
@ -17,6 +17,7 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.eosc.model.EoscResult;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
import eu.dnetlib.dhp.oa.graph.dump.Constants;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
@ -76,9 +77,9 @@ public class SelectEoscResultsJobStep1 implements Serializable {
|
||||||
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible()
|
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible()
|
||||||
&& r.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
&& r.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<R, GraphResult>) r -> (GraphResult) ResultMapper
|
(MapFunction<R, EoscResult>) r -> (EoscResult) ResultMapper
|
||||||
.map(r, null, Constants.DUMPTYPE.COMPLETE.getType()),
|
.map(r, null, Constants.DUMPTYPE.EOSC.getType()),
|
||||||
Encoders.bean(GraphResult.class))
|
Encoders.bean(EoscResult.class))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
|
|
@ -1,10 +1,5 @@
|
||||||
[
|
[
|
||||||
{
|
|
||||||
"paramName":"cmp",
|
|
||||||
"paramLongName":"communityMapPath",
|
|
||||||
"paramDescription": "the path to the serialization of the community map",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"paramName":"s",
|
"paramName":"s",
|
||||||
"paramLongName":"sourcePath",
|
"paramLongName":"sourcePath",
|
||||||
|
|
|
@ -117,7 +117,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/tar/publication</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/tar/publication</arg>
|
||||||
<arg>--communityMapPath</arg><arg>noneed</arg>
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait_eosc_dump"/>
|
<ok to="wait_eosc_dump"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -142,7 +142,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/tar/dataset</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/tar/dataset</arg>
|
||||||
<arg>--communityMapPath</arg><arg>noneed</arg>
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait_eosc_dump"/>
|
<ok to="wait_eosc_dump"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -167,7 +167,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/tar/otherresearchproduct</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/tar/otherresearchproduct</arg>
|
||||||
<arg>--communityMapPath</arg><arg>noneed</arg>
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait_eosc_dump"/>
|
<ok to="wait_eosc_dump"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -192,7 +192,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/tar/software</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/tar/software</arg>
|
||||||
<arg>--communityMapPath</arg><arg>noneed</arg>
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait_eosc_dump"/>
|
<ok to="wait_eosc_dump"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -25,7 +25,9 @@ import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.eosc.model.EoscResult;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1;
|
||||||
import eu.dnetlib.dhp.oa.model.Instance;
|
import eu.dnetlib.dhp.oa.model.Instance;
|
||||||
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
|
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
|
||||||
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
|
||||||
|
@ -881,6 +883,47 @@ public class DumpJobTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEOSCDump() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/eosctag.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
final String communityMapPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
SelectEoscResultsJobStep1
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath",
|
||||||
|
sourcePath,
|
||||||
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||||
|
"-outputPath", workingDir.toString() + "/working"
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<EoscResult> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/working")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class));
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<EoscResult> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(EoscResult.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, verificationDataset.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, verificationDataset.filter("type = 'dataset'").count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getCode().equals("EOSC::Twitter Data")).count());
|
||||||
|
Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getLabel().equals("EOSC::Twitter Data")).count());
|
||||||
|
Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getUrl().equals("")).count());
|
||||||
|
Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getSemanticRelation().equals("compliesWith")).count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testArticlePCA() {
|
public void testArticlePCA() {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue