[Dump] removing all EOSC related addition from master

This commit is contained in:
Miriam Baglioni 2022-10-11 11:41:19 +02:00
parent 80e525e0c1
commit 8a574fee2a
12 changed files with 3 additions and 646 deletions

View File

@ -12,7 +12,6 @@ import com.fasterxml.jackson.databind.SerializationFeature;
import com.github.imifou.jsonschema.module.addon.AddonModule; import com.github.imifou.jsonschema.module.addon.AddonModule;
import com.github.victools.jsonschema.generator.*; import com.github.victools.jsonschema.generator.*;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.oa.model.community.CommunityResult; import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.*; import eu.dnetlib.dhp.oa.model.graph.*;
@ -70,7 +69,5 @@ public class ExecCreateSchemas {
ecs.generate(CommunityResult.class, DIRECTORY, "community_result_schema.json"); ecs.generate(CommunityResult.class, DIRECTORY, "community_result_schema.json");
ecs.generate(EoscResult.class, DIRECTORY, "eosc_result_schema.json");
} }
} }

View File

@ -1,67 +0,0 @@
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* @author miriam.baglioni
* @Date 29/07/22
*/
public class EoscInteroperabilityFramework implements Serializable {
@JsonSchema(description = "EOSC-IF label")
private String label;
@JsonSchema(
description = "EOSC-IF local code. Later on it could be populated with a PID (e.g. DOI), but for the time being we stick to a more loose definition.")
private String code;
@JsonSchema(description = "EOSC-IF url to the guidelines")
private String url;
@JsonSchema(description = "EOSC-IF semantic relation (e.g. compliesWith)")
private String semanticRelation;
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getSemanticRelation() {
return semanticRelation;
}
public void setSemanticRelation(String semanticRelation) {
this.semanticRelation = semanticRelation;
}
public static EoscInteroperabilityFramework newInstance(String code, String label, String url,
String semanticRelation) {
EoscInteroperabilityFramework eif = new EoscInteroperabilityFramework();
eif.label = label;
eif.code = code;
eif.url = url;
eif.semanticRelation = semanticRelation;
return eif;
}
}

View File

@ -1,23 +0,0 @@
package eu.dnetlib.dhp.eosc.model;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
/**
* @author miriam.baglioni
* @Date 29/07/22
*/
public class EoscResult extends GraphResult {
@JsonSchema(description = "Describes a reference to the EOSC Interoperability Framework (IF) Guidelines")
private EoscInteroperabilityFramework eoscIF;
public EoscInteroperabilityFramework getEoscIF() {
return eoscIF;
}
public void setEoscIF(EoscInteroperabilityFramework eoscIF) {
this.eoscIF = eoscIF;
}
}

View File

@ -9,7 +9,6 @@ import com.github.imifou.jsonschema.module.addon.AddonModule;
import com.github.victools.jsonschema.generator.*; import com.github.victools.jsonschema.generator.*;
import eu.dnetlib.dhp.ExecCreateSchemas; import eu.dnetlib.dhp.ExecCreateSchemas;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.oa.model.graph.GraphResult; import eu.dnetlib.dhp.oa.model.graph.GraphResult;
//@Disabled //@Disabled
@ -46,22 +45,7 @@ class GenerateJsonSchema {
System.out.println(jsonSchema.toString()); System.out.println(jsonSchema.toString());
} }
@Test
void generateSchema3() throws JsonProcessingException {
ObjectMapper objectMapper = new ObjectMapper();
AddonModule module = new AddonModule();
SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(objectMapper,
SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON)
.with(module)
.with(Option.SCHEMA_VERSION_INDICATOR)
.without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS);
SchemaGeneratorConfig config = configBuilder.build();
SchemaGenerator generator = new SchemaGenerator(config);
JsonNode jsonSchema = generator.generateSchema(EoscResult.class);
System.out.println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(jsonSchema));
}
@Test @Test
void generateJsonSchema3() throws IOException { void generateJsonSchema3() throws IOException {

View File

@ -9,12 +9,6 @@ import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.eosc.model.EoscInteroperabilityFramework;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException; import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.model.*; import eu.dnetlib.dhp.oa.model.*;
@ -44,8 +38,6 @@ public class ResultMapper implements Serializable {
Result out; Result out;
if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
out = new GraphResult(); out = new GraphResult();
} else if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) {
out = new EoscResult();
} else { } else {
out = new CommunityResult(); out = new CommunityResult();
} }
@ -158,10 +150,6 @@ public class ResultMapper implements Serializable {
((GraphResult) out) ((GraphResult) out)
.setInstance( .setInstance(
oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList()));
} else if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) {
((EoscResult) out)
.setInstance(
oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList()));
} else { } else {
((CommunityResult) out) ((CommunityResult) out)
.setInstance( .setInstance(
@ -260,26 +248,7 @@ public class ResultMapper implements Serializable {
out.setIndicators(indicators); out.setIndicators(indicators);
} }
if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
if (Optional.ofNullable(input.getEoscifguidelines()).isPresent()) {
List<EoscIfGuidelines> gei = input.getEoscifguidelines();
if (gei.size() > 1) {
throw new CardinalityTooHighException(
"EOSC IF in the result has cardinality greater than one. Change dump!");
}
if (gei.size() == 1) {
EoscIfGuidelines ifra = gei.get(0);
((EoscResult) out)
.setEoscIF(
EoscInteroperabilityFramework
.newInstance(
ifra.getCode(), ifra.getLabel(), ifra.getUrl(),
ifra.getSemanticRelation()));
}
}
} else if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
((CommunityResult) out) ((CommunityResult) out)
.setCollectedfrom( .setCollectedfrom(
input input

View File

@ -1,83 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.eosc;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.schema.oaf.Relation;
/**
* @author miriam.baglioni
* @Date 27/07/22
*/
public class SelectEoscRelationsStep2 implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SelectEoscRelationsStep2.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SelectEoscRelationsStep2.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/reletion_selection_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String resultPath = parser.get("resultPath");
log.info("resultPath: {}", resultPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, resultPath + "/relation");
selectRelations(spark, inputPath, resultPath + "/relation", resultPath);
});
}
private static void selectRelations(SparkSession spark, String inputPath, String outputPath, String resultPath) {
Dataset<GraphResult> results = Utils
.readPath(spark, resultPath + "/publication", GraphResult.class)
.union(
Utils
.readPath(spark, resultPath + "/dataset", GraphResult.class))
.union(
Utils
.readPath(spark, resultPath + "/software", GraphResult.class))
.union(
Utils
.readPath(spark, resultPath + "/otherresearchproduct", GraphResult.class));
Dataset<Relation> relations = Utils
.readPath(spark, inputPath + "/relation", Relation.class)
.filter(
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
!r.getDataInfo().getInvisible());
}
}

View File

@ -1,89 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.eosc;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.schema.oaf.Result;
/**
* @author miriam.baglioni
* @Date 27/07/22
*/
public class SelectEoscResultsJobStep1 implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SelectEoscResultsJobStep1.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SelectEoscResultsJobStep1.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/eosc_select_result_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
selectEoscResults(spark, inputPath, outputPath, inputClazz);
});
}
private static <R extends Result> void selectEoscResults(SparkSession spark, String inputPath, String outputPath,
Class<R> inputClazz) {
Utils
.readPath(spark, inputPath, inputClazz)
.filter(
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible()
&& r.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
.map(
(MapFunction<R, EoscResult>) r -> (EoscResult) ResultMapper
.map(r, null, Constants.DUMPTYPE.EOSC.getType()),
Encoders.bean(EoscResult.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
}

View File

@ -1,30 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -1,30 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -1,231 +0,0 @@
<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>accessToken</name>
<description>the access token used for the deposition in Zenodo</description>
</property>
<property>
<name>connectionUrl</name>
<description>the connection url for Zenodo</description>
</property>
<property>
<name>metadata</name>
<description> the metadata associated to the deposition</description>
</property>
<property>
<name>depositionType</name>
<description>the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)</description>
</property>
<property>
<name>conceptRecordId</name>
<description>for new version, the id of the record for the old deposition</description>
</property>
<property>
<name>depositionId</name>
<description>the depositionId of a deposition open that has to be added content</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="fork_dump_eosc_result"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<fork name="fork_dump_eosc_result">
<path start="dump_eosc_publication"/>
<path start="dump_eosc_dataset"/>
<path start="dump_eosc_orp"/>
<path start="dump_eosc_software"/>
</fork>
<action name="dump_eosc_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump Publication For EOSC </name>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/publication</arg>
</spark>
<ok to="wait_eosc_dump"/>
<error to="Kill"/>
</action>
<action name="dump_eosc_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump Dataset For EOSC </name>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/dataset</arg>
</spark>
<ok to="wait_eosc_dump"/>
<error to="Kill"/>
</action>
<action name="dump_eosc_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump ORP For EOSC </name>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/otherresearchproduct</arg>
</spark>
<ok to="wait_eosc_dump"/>
<error to="Kill"/>
</action>
<action name="dump_eosc_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump Software For EOSC </name>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/software</arg>
</spark>
<ok to="wait_eosc_dump"/>
<error to="Kill"/>
</action>
<join name="wait_eosc_dump" to="make_archive"/>
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${workingDir}/tar</arg>
</java>
<ok to="send_zenodo"/>
<error to="Kill"/>
</action>
<action name="send_zenodo">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--accessToken</arg><arg>${accessToken}</arg>
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
<arg>--metadata</arg><arg>${metadata}</arg>
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
<arg>--depositionType</arg><arg>${depositionType}</arg>
<arg>--depositionId</arg><arg>${depositionId}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -25,9 +25,9 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1;
import eu.dnetlib.dhp.oa.model.Instance; import eu.dnetlib.dhp.oa.model.Instance;
import eu.dnetlib.dhp.oa.model.OpenAccessRoute; import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
import eu.dnetlib.dhp.oa.model.community.CommunityResult; import eu.dnetlib.dhp.oa.model.community.CommunityResult;
@ -1096,46 +1096,7 @@ public class DumpJobTest {
} }
@Test
public void testEOSCDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/eosctag.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SelectEoscResultsJobStep1
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath",
sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/working"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<EoscResult> tmp = sc
.textFile(workingDir.toString() + "/working")
.map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class));
org.apache.spark.sql.Dataset<EoscResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(EoscResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'dataset'").count());
Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getCode().equals("EOSC::Twitter Data")).count());
Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getLabel().equals("EOSC::Twitter Data")).count());
Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getUrl().equals("")).count());
Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getSemanticRelation().equals("compliesWith")).count());
}
@Test @Test
public void testArticlePCA() { public void testArticlePCA() {

File diff suppressed because one or more lines are too long