Compare commits
6 Commits
master
...
promote_ac
Author | SHA1 | Date |
---|---|---|
Claudio Atzori | fd17c1f17c | |
Claudio Atzori | 009dcf6aea | |
Claudio Atzori | bb82052c40 | |
Claudio Atzori | 42f5506306 | |
Alessia Bardi | f2a08d8cc2 | |
Miriam Baglioni | a5995ab557 |
|
@ -312,7 +312,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
}
|
||||
|
||||
if (value instanceof Datasource) {
|
||||
// nothing to evaluate here
|
||||
final Datasource d = (Datasource) value;
|
||||
return Objects.nonNull(d.getOfficialname()) && StringUtils.isNotBlank(d.getOfficialname().getValue());
|
||||
} else if (value instanceof Project) {
|
||||
final Project p = (Project) value;
|
||||
return Objects.nonNull(p.getCode()) && StringUtils.isNotBlank(p.getCode().getValue());
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (c) 2024.
|
||||
* SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package eu.dnetlib.dhp.actionmanager.promote;
|
||||
|
||||
/** Encodes the Actionset promotion strategies */
|
||||
public class PromoteAction {
|
||||
|
||||
/** The supported actionset promotion strategies
|
||||
*
|
||||
* ENRICH: promotes only records in the actionset matching another record in the
|
||||
* graph and enriches them applying the given MergeAndGet strategy
|
||||
* UPSERT: promotes all the records in an actionset, matching records are updated
|
||||
* using the given MergeAndGet strategy, the non-matching record as inserted as they are.
|
||||
*/
|
||||
public enum Strategy {
|
||||
ENRICH, UPSERT
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string representation of the join type implementing the given PromoteAction.
|
||||
*
|
||||
* @param strategy the strategy to be used to promote the Actionset contents
|
||||
* @return the join type used to implement the promotion strategy
|
||||
*/
|
||||
public static String joinTypeForStrategy(PromoteAction.Strategy strategy) {
|
||||
switch (strategy) {
|
||||
case ENRICH:
|
||||
return "left_outer";
|
||||
case UPSERT:
|
||||
return "full_outer";
|
||||
default:
|
||||
throw new IllegalStateException("unsupported PromoteAction: " + strategy.toString());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -67,8 +67,9 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
String outputGraphTablePath = parser.get("outputGraphTablePath");
|
||||
logger.info("outputGraphTablePath: {}", outputGraphTablePath);
|
||||
|
||||
MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
|
||||
logger.info("strategy: {}", strategy);
|
||||
MergeAndGet.Strategy mergeAndGetStrategy = MergeAndGet.Strategy
|
||||
.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
|
||||
logger.info("mergeAndGetStrategy: {}", mergeAndGetStrategy);
|
||||
|
||||
Boolean shouldGroupById = Optional
|
||||
.ofNullable(parser.get("shouldGroupById"))
|
||||
|
@ -76,6 +77,12 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
.orElse(true);
|
||||
logger.info("shouldGroupById: {}", shouldGroupById);
|
||||
|
||||
PromoteAction.Strategy promoteActionStrategy = Optional
|
||||
.ofNullable(parser.get("promoteActionStrategy"))
|
||||
.map(PromoteAction.Strategy::valueOf)
|
||||
.orElse(PromoteAction.Strategy.UPSERT);
|
||||
logger.info("promoteActionStrategy: {}", promoteActionStrategy);
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
|
||||
@SuppressWarnings("unchecked")
|
||||
|
@ -97,7 +104,8 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
inputGraphTablePath,
|
||||
inputActionPayloadPath,
|
||||
outputGraphTablePath,
|
||||
strategy,
|
||||
mergeAndGetStrategy,
|
||||
promoteActionStrategy,
|
||||
rowClazz,
|
||||
actionPayloadClazz,
|
||||
shouldGroupById);
|
||||
|
@ -124,14 +132,16 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
String inputGraphTablePath,
|
||||
String inputActionPayloadPath,
|
||||
String outputGraphTablePath,
|
||||
MergeAndGet.Strategy strategy,
|
||||
MergeAndGet.Strategy mergeAndGetStrategy,
|
||||
PromoteAction.Strategy promoteActionStrategy,
|
||||
Class<G> rowClazz,
|
||||
Class<A> actionPayloadClazz, Boolean shouldGroupById) {
|
||||
Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
|
||||
Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
|
||||
|
||||
Dataset<G> result = promoteActionPayloadForGraphTable(
|
||||
rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz, shouldGroupById)
|
||||
rowDS, actionPayloadDS, mergeAndGetStrategy, promoteActionStrategy, rowClazz, actionPayloadClazz,
|
||||
shouldGroupById)
|
||||
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
|
||||
|
||||
saveGraphTable(result, outputGraphTablePath);
|
||||
|
@ -183,7 +193,8 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(
|
||||
Dataset<G> rowDS,
|
||||
Dataset<A> actionPayloadDS,
|
||||
MergeAndGet.Strategy strategy,
|
||||
MergeAndGet.Strategy mergeAndGetStrategy,
|
||||
PromoteAction.Strategy promoteActionStrategy,
|
||||
Class<G> rowClazz,
|
||||
Class<A> actionPayloadClazz,
|
||||
Boolean shouldGroupById) {
|
||||
|
@ -195,8 +206,9 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
|
||||
SerializableSupplier<Function<G, String>> rowIdFn = ModelSupport::idFn;
|
||||
SerializableSupplier<Function<A, String>> actionPayloadIdFn = ModelSupport::idFn;
|
||||
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy);
|
||||
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy);
|
||||
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet
|
||||
.functionFor(mergeAndGetStrategy);
|
||||
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(mergeAndGetStrategy);
|
||||
SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
|
||||
SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSourceAndTarget;
|
||||
|
||||
|
@ -207,6 +219,7 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
rowIdFn,
|
||||
actionPayloadIdFn,
|
||||
mergeRowWithActionPayloadAndGetFn,
|
||||
promoteActionStrategy,
|
||||
rowClazz,
|
||||
actionPayloadClazz);
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ public class PromoteActionPayloadFunctions {
|
|||
* @param rowIdFn Function used to get the id of graph table row
|
||||
* @param actionPayloadIdFn Function used to get id of action payload instance
|
||||
* @param mergeAndGetFn Function used to merge graph table row and action payload instance
|
||||
* @param promoteActionStrategy the Actionset promotion strategy
|
||||
* @param rowClazz Class of graph table
|
||||
* @param actionPayloadClazz Class of action payload
|
||||
* @param <G> Type of graph table row
|
||||
|
@ -46,6 +47,7 @@ public class PromoteActionPayloadFunctions {
|
|||
SerializableSupplier<Function<G, String>> rowIdFn,
|
||||
SerializableSupplier<Function<A, String>> actionPayloadIdFn,
|
||||
SerializableSupplier<BiFunction<G, A, G>> mergeAndGetFn,
|
||||
PromoteAction.Strategy promoteActionStrategy,
|
||||
Class<G> rowClazz,
|
||||
Class<A> actionPayloadClazz) {
|
||||
if (!isSubClass(rowClazz, actionPayloadClazz)) {
|
||||
|
@ -61,7 +63,7 @@ public class PromoteActionPayloadFunctions {
|
|||
.joinWith(
|
||||
actionPayloadWithIdDS,
|
||||
rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")),
|
||||
"full_outer")
|
||||
PromoteAction.joinTypeForStrategy(promoteActionStrategy))
|
||||
.map(
|
||||
(MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>) value -> {
|
||||
Optional<G> rowOpt = Optional.ofNullable(value._1()).map(Tuple2::_2);
|
||||
|
|
|
@ -41,6 +41,12 @@
|
|||
"paramDescription": "strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "pas",
|
||||
"paramLongName": "promoteActionStrategy",
|
||||
"paramDescription": "strategy for promoting the actionset contents into the graph tables, ENRICH or UPSERT (default)",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "sgid",
|
||||
"paramLongName": "shouldGroupById",
|
||||
|
|
|
@ -115,6 +115,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/dataset</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="DecisionPromoteResultActionPayloadForDatasetTable"/>
|
||||
|
@ -167,6 +168,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/dataset</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -106,6 +106,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/datasource</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -106,6 +106,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/organization</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -114,6 +114,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="DecisionPromoteResultActionPayloadForOtherResearchProductTable"/>
|
||||
|
@ -166,6 +167,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/otherresearchproduct</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -106,6 +106,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/project</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -115,6 +115,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/publication</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="DecisionPromoteResultActionPayloadForPublicationTable"/>
|
||||
|
@ -167,6 +168,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/publication</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -107,6 +107,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/relation</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -114,6 +114,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/software</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="DecisionPromoteResultActionPayloadForSoftwareTable"/>
|
||||
|
@ -166,6 +167,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/software</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -54,7 +54,7 @@ public class PromoteActionPayloadFunctionsTest {
|
|||
RuntimeException.class,
|
||||
() -> PromoteActionPayloadFunctions
|
||||
.joinGraphTableWithActionPayloadAndMerge(
|
||||
null, null, null, null, null, OafImplSubSub.class, OafImpl.class));
|
||||
null, null, null, null, null, null, OafImplSubSub.class, OafImpl.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -104,6 +104,7 @@ public class PromoteActionPayloadFunctionsTest {
|
|||
rowIdFn,
|
||||
actionPayloadIdFn,
|
||||
mergeAndGetFn,
|
||||
PromoteAction.Strategy.UPSERT,
|
||||
OafImplSubSub.class,
|
||||
OafImplSubSub.class)
|
||||
.collectAsList();
|
||||
|
@ -183,6 +184,7 @@ public class PromoteActionPayloadFunctionsTest {
|
|||
rowIdFn,
|
||||
actionPayloadIdFn,
|
||||
mergeAndGetFn,
|
||||
PromoteAction.Strategy.UPSERT,
|
||||
OafImplSubSub.class,
|
||||
OafImplSub.class)
|
||||
.collectAsList();
|
||||
|
|
|
@ -114,7 +114,7 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
|
@ -142,7 +142,7 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
|
@ -170,7 +170,7 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
|
@ -198,7 +198,7 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
|
@ -225,8 +225,8 @@
|
|||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/orcid/orcidprop</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
|
||||
</spark>
|
||||
<ok to="fork-join-exec-propagation"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -247,9 +247,10 @@
|
|||
<class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=4
|
||||
--executor-memory=4G
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=5G
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
|
@ -259,9 +260,9 @@
|
|||
--conf spark.speculation=false
|
||||
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
--conf spark.sql.shuffle.partitions=15000
|
||||
</spark-opts>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||
|
@ -291,7 +292,7 @@
|
|||
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
</spark-opts>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||
|
@ -321,7 +322,7 @@
|
|||
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
</spark-opts>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
|
@ -351,7 +352,7 @@
|
|||
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
</spark-opts>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||
|
|
|
@ -49,10 +49,10 @@
|
|||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${graphPath}/datasource"/>
|
||||
<delete path="${graphPath}/organization"/>
|
||||
<delete path="${graphPath}/project"/>
|
||||
<delete path="${graphPath}/relation"/>
|
||||
<delete path="${targetPath}/datasource"/>
|
||||
<delete path="${targetPath}/organization"/>
|
||||
<delete path="${targetPath}/project"/>
|
||||
<delete path="${targetPath}/relation"/>
|
||||
</fs>
|
||||
<ok to="copy_datasource"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -1250,6 +1250,21 @@ class MappersTest {
|
|||
System.out.println("***************");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testIRISPub() throws IOException, DocumentException {
|
||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("iris-odf.xml")));
|
||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||
System.out.println("***************");
|
||||
System.out.println(new ObjectMapper().writeValueAsString(list));
|
||||
System.out.println("***************");
|
||||
final Publication p = (Publication) list.get(0);
|
||||
assertNotNull(p.getInstance().get(0).getUrl().get(0));
|
||||
assertValidId(p.getId());
|
||||
System.out.println(p.getInstance().get(0).getUrl());
|
||||
p.getPid().forEach(x -> System.out.println(x.getValue()));
|
||||
p.getInstance().get(0).getAlternateIdentifier().forEach(x -> System.out.println(x.getValue()));
|
||||
|
||||
}
|
||||
@Test
|
||||
void testNotWellFormed() throws IOException {
|
||||
final String xml = IOUtils
|
||||
|
|
|
@ -0,0 +1,215 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<record xmlns:datacite="http://datacite.org/schema/kernel-4"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<oai:header xmlns="http://namespace.openaire.eu/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
|
||||
<identifier>oai:air.unimi.it:2434/907506</identifier>
|
||||
<datestamp>2024-01-04T12:42:51Z</datestamp>
|
||||
<setSpec>com_2434_73555</setSpec>
|
||||
<setSpec>col_2434_73557</setSpec>
|
||||
<setSpec>openaire</setSpec>
|
||||
<dr:dateOfTransformation>2024-01-29T16:56:50.632Z</dr:dateOfTransformation>
|
||||
|
||||
<dri:objIdentifier>od______1261::ff2d9e058e7bea90a27f41c31078e601</dri:objIdentifier>
|
||||
<dri:recordIdentifier>oai:air.unimi.it:2434/907506</dri:recordIdentifier>
|
||||
<dri:dateOfCollection/>
|
||||
<dri:mdFormat/>
|
||||
<dri:mdFormatInterpretation/>
|
||||
<dri:repositoryId/>
|
||||
<oaf:datasourceprefix> od______1261</oaf:datasourceprefix>
|
||||
</oai:header>
|
||||
<metadata>
|
||||
<oaire:resource xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
|
||||
xmlns:exslt="http://exslt.org/common"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:rdf="http://www.w3.org/TR/rdf-concepts/"
|
||||
xmlns:doc="http://www.lyncode.com/xoai"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-4"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:vc="http://www.w3.org/2007/XMLSchema-versioning"
|
||||
xmlns="http://www.openarchives.org/OAI/2.0/"
|
||||
xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
|
||||
<datacite:titles>
|
||||
<datacite:title xml:lang="en">Ensuring tests of conservation interventions build on existing literature</datacite:title>
|
||||
</datacite:titles>
|
||||
<datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>W.J. Sutherland</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>S.T. Alvarez-Castaneda</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>T. Amano</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>R. Ambrosini</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>P. Atkinson</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>J.M. Baxter</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>A.L. Bond</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>P.J. Boon</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>K.L. Buchanan</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>J. Barlow</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>G. Bogliani</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>O.M. Bragg</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M. Burgman</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M.W. Cadotte</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M. Calver</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>S.J. Cooke</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>R.T. Corlett</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>V. Devictor</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>J.G. Ewen</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M. Fisher</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>G. Freeman</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>E. Game</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>B.J. Godley</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>C. Gortazar</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>I.R. Hartley</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>D.L. Hawksworth</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>K.A. Hobson</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M.-. Lu</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>B. Martin-Lopez</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>K. Ma</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>A. Machado</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>D. Mae</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M. Mangiacotti</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>D.J. Mccafferty</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>V. Melfi</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>S. Molur</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>A.J. Moore</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>S.D. Murphy</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>D. Norri</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>A.P.E. van Oudenhoven</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>J. Power</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>E.C. Ree</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M.W. Schwartz</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>I. Storch</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>C. Wordley</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
</datacite:creator>
|
||||
<datacite:relatedIdentifiers>
|
||||
</datacite:relatedIdentifiers>
|
||||
<datacite:dates>
|
||||
<datacite:date dateType="Accepted">2020</datacite:date>
|
||||
<datacite:date dateType="Issued">2020</datacite:date>
|
||||
<datacite:date dateType="Available">2022-06-20</datacite:date>
|
||||
</datacite:dates>
|
||||
<dc:language>eng</dc:language>
|
||||
<dc:publisher>Wiley Blackwell Publishing</dc:publisher>
|
||||
<oaire:resourceType resourceTypeGeneral="literature"
|
||||
uri="http://purl.org/coar/resource_type/c_6501">journal article</oaire:resourceType>
|
||||
<dc:format>application/pdf</dc:format>
|
||||
<datacite:identifier xmlns:datacite="http://datacite.org/schema/kernel-3"
|
||||
identifierType="Handle">2434/907506</datacite:identifier>
|
||||
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
|
||||
<datacite:subjects>
|
||||
<datacite:subject>Conservation of Natural Resources</datacite:subject>
|
||||
</datacite:subjects>
|
||||
<datacite:sizes/>
|
||||
<datacite:sizes/>
|
||||
<datacite:sizes>
|
||||
<datacite:size>191802 bytes</datacite:size>
|
||||
</datacite:sizes>
|
||||
<oaire:file accessRightsURI="" mimeType="application/pdf" objectType="fulltext">https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf</oaire:file>
|
||||
</oaire:resource>
|
||||
<oaf:identifier identifierType="DOI">10.1111/cobi.13555</oaf:identifier>
|
||||
<oaf:identifier identifierType="PMID">32779884</oaf:identifier>
|
||||
<oaf:fulltext>https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf</oaf:fulltext>
|
||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||
<oaf:dateAccepted>2020-01-01</oaf:dateAccepted>
|
||||
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||
<oaf:language>eng</oaf:language>
|
||||
<oaf:hostedBy name="Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano"
|
||||
id="opendoar____::1261"/>
|
||||
<oaf:collectedFrom name="Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano"
|
||||
id="opendoar____::1261"/>
|
||||
</metadata>
|
||||
</record>
|
|
@ -244,4 +244,27 @@ public class XmlRecordFactoryTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIrisGuidelines4() throws DocumentException, IOException {
|
||||
final ContextMapper contextMapper = new ContextMapper();
|
||||
|
||||
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||
XmlConverterJob.schemaLocation);
|
||||
|
||||
final Publication p = OBJECT_MAPPER
|
||||
.readValue(
|
||||
IOUtils.toString(getClass().getResourceAsStream("iris-odf-4.json")),
|
||||
Publication.class);
|
||||
|
||||
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
||||
|
||||
assertNotNull(xml);
|
||||
|
||||
final Document doc = new SAXReader().read(new StringReader(xml));
|
||||
|
||||
assertNotNull(doc);
|
||||
System.out.println(doc.asXML());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue