forked from D-Net/dnet-hadoop
Merge branch 'beta' into import_orps_fix
This commit is contained in:
commit
a63b091bae
|
@ -23,15 +23,18 @@ public class InstanceTypeMatch extends AbstractListComparator {
|
||||||
|
|
||||||
// jolly types
|
// jolly types
|
||||||
translationMap.put("Conference object", "*");
|
translationMap.put("Conference object", "*");
|
||||||
|
translationMap.put("Research", "*");
|
||||||
translationMap.put("Other literature type", "*");
|
translationMap.put("Other literature type", "*");
|
||||||
translationMap.put("Unknown", "*");
|
translationMap.put("Unknown", "*");
|
||||||
translationMap.put("UNKNOWN", "*");
|
translationMap.put("UNKNOWN", "*");
|
||||||
|
|
||||||
// article types
|
// article types
|
||||||
translationMap.put("Article", "Article");
|
translationMap.put("Article", "Article");
|
||||||
|
translationMap.put("Journal", "Article");
|
||||||
translationMap.put("Data Paper", "Article");
|
translationMap.put("Data Paper", "Article");
|
||||||
translationMap.put("Software Paper", "Article");
|
translationMap.put("Software Paper", "Article");
|
||||||
translationMap.put("Preprint", "Article");
|
translationMap.put("Preprint", "Article");
|
||||||
|
translationMap.put("Part of book or chapter of book", "Article");
|
||||||
|
|
||||||
// thesis types
|
// thesis types
|
||||||
translationMap.put("Thesis", "Thesis");
|
translationMap.put("Thesis", "Thesis");
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024.
|
||||||
|
* SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.promote;
|
||||||
|
|
||||||
|
/** Encodes the Actionset promotion strategies */
|
||||||
|
public class PromoteAction {
|
||||||
|
|
||||||
|
/** The supported actionset promotion strategies
|
||||||
|
*
|
||||||
|
* ENRICH: promotes only records in the actionset matching another record in the
|
||||||
|
* graph and enriches them applying the given MergeAndGet strategy
|
||||||
|
* UPSERT: promotes all the records in an actionset, matching records are updated
|
||||||
|
* using the given MergeAndGet strategy, the non-matching record as inserted as they are.
|
||||||
|
*/
|
||||||
|
public enum Strategy {
|
||||||
|
ENRICH, UPSERT
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the string representation of the join type implementing the given PromoteAction.
|
||||||
|
*
|
||||||
|
* @param strategy the strategy to be used to promote the Actionset contents
|
||||||
|
* @return the join type used to implement the promotion strategy
|
||||||
|
*/
|
||||||
|
public static String joinTypeForStrategy(PromoteAction.Strategy strategy) {
|
||||||
|
switch (strategy) {
|
||||||
|
case ENRICH:
|
||||||
|
return "left_outer";
|
||||||
|
case UPSERT:
|
||||||
|
return "full_outer";
|
||||||
|
default:
|
||||||
|
throw new IllegalStateException("unsupported PromoteAction: " + strategy.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -67,8 +67,9 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
String outputGraphTablePath = parser.get("outputGraphTablePath");
|
String outputGraphTablePath = parser.get("outputGraphTablePath");
|
||||||
logger.info("outputGraphTablePath: {}", outputGraphTablePath);
|
logger.info("outputGraphTablePath: {}", outputGraphTablePath);
|
||||||
|
|
||||||
MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
|
MergeAndGet.Strategy mergeAndGetStrategy = MergeAndGet.Strategy
|
||||||
logger.info("strategy: {}", strategy);
|
.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
|
||||||
|
logger.info("mergeAndGetStrategy: {}", mergeAndGetStrategy);
|
||||||
|
|
||||||
Boolean shouldGroupById = Optional
|
Boolean shouldGroupById = Optional
|
||||||
.ofNullable(parser.get("shouldGroupById"))
|
.ofNullable(parser.get("shouldGroupById"))
|
||||||
|
@ -76,6 +77,12 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
.orElse(true);
|
.orElse(true);
|
||||||
logger.info("shouldGroupById: {}", shouldGroupById);
|
logger.info("shouldGroupById: {}", shouldGroupById);
|
||||||
|
|
||||||
|
PromoteAction.Strategy promoteActionStrategy = Optional
|
||||||
|
.ofNullable(parser.get("promoteActionStrategy"))
|
||||||
|
.map(PromoteAction.Strategy::valueOf)
|
||||||
|
.orElse(PromoteAction.Strategy.UPSERT);
|
||||||
|
logger.info("promoteActionStrategy: {}", promoteActionStrategy);
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
|
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
|
@ -97,7 +104,8 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
inputGraphTablePath,
|
inputGraphTablePath,
|
||||||
inputActionPayloadPath,
|
inputActionPayloadPath,
|
||||||
outputGraphTablePath,
|
outputGraphTablePath,
|
||||||
strategy,
|
mergeAndGetStrategy,
|
||||||
|
promoteActionStrategy,
|
||||||
rowClazz,
|
rowClazz,
|
||||||
actionPayloadClazz,
|
actionPayloadClazz,
|
||||||
shouldGroupById);
|
shouldGroupById);
|
||||||
|
@ -124,14 +132,16 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
String inputGraphTablePath,
|
String inputGraphTablePath,
|
||||||
String inputActionPayloadPath,
|
String inputActionPayloadPath,
|
||||||
String outputGraphTablePath,
|
String outputGraphTablePath,
|
||||||
MergeAndGet.Strategy strategy,
|
MergeAndGet.Strategy mergeAndGetStrategy,
|
||||||
|
PromoteAction.Strategy promoteActionStrategy,
|
||||||
Class<G> rowClazz,
|
Class<G> rowClazz,
|
||||||
Class<A> actionPayloadClazz, Boolean shouldGroupById) {
|
Class<A> actionPayloadClazz, Boolean shouldGroupById) {
|
||||||
Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
|
Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
|
||||||
Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
|
Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
|
||||||
|
|
||||||
Dataset<G> result = promoteActionPayloadForGraphTable(
|
Dataset<G> result = promoteActionPayloadForGraphTable(
|
||||||
rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz, shouldGroupById)
|
rowDS, actionPayloadDS, mergeAndGetStrategy, promoteActionStrategy, rowClazz, actionPayloadClazz,
|
||||||
|
shouldGroupById)
|
||||||
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
|
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
|
||||||
|
|
||||||
saveGraphTable(result, outputGraphTablePath);
|
saveGraphTable(result, outputGraphTablePath);
|
||||||
|
@ -183,7 +193,8 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(
|
private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(
|
||||||
Dataset<G> rowDS,
|
Dataset<G> rowDS,
|
||||||
Dataset<A> actionPayloadDS,
|
Dataset<A> actionPayloadDS,
|
||||||
MergeAndGet.Strategy strategy,
|
MergeAndGet.Strategy mergeAndGetStrategy,
|
||||||
|
PromoteAction.Strategy promoteActionStrategy,
|
||||||
Class<G> rowClazz,
|
Class<G> rowClazz,
|
||||||
Class<A> actionPayloadClazz,
|
Class<A> actionPayloadClazz,
|
||||||
Boolean shouldGroupById) {
|
Boolean shouldGroupById) {
|
||||||
|
@ -195,8 +206,9 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
|
|
||||||
SerializableSupplier<Function<G, String>> rowIdFn = ModelSupport::idFn;
|
SerializableSupplier<Function<G, String>> rowIdFn = ModelSupport::idFn;
|
||||||
SerializableSupplier<Function<A, String>> actionPayloadIdFn = ModelSupport::idFn;
|
SerializableSupplier<Function<A, String>> actionPayloadIdFn = ModelSupport::idFn;
|
||||||
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy);
|
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet
|
||||||
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy);
|
.functionFor(mergeAndGetStrategy);
|
||||||
|
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(mergeAndGetStrategy);
|
||||||
SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
|
SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
|
||||||
SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSourceAndTarget;
|
SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSourceAndTarget;
|
||||||
|
|
||||||
|
@ -207,6 +219,7 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
rowIdFn,
|
rowIdFn,
|
||||||
actionPayloadIdFn,
|
actionPayloadIdFn,
|
||||||
mergeRowWithActionPayloadAndGetFn,
|
mergeRowWithActionPayloadAndGetFn,
|
||||||
|
promoteActionStrategy,
|
||||||
rowClazz,
|
rowClazz,
|
||||||
actionPayloadClazz);
|
actionPayloadClazz);
|
||||||
|
|
||||||
|
|
|
@ -34,6 +34,7 @@ public class PromoteActionPayloadFunctions {
|
||||||
* @param rowIdFn Function used to get the id of graph table row
|
* @param rowIdFn Function used to get the id of graph table row
|
||||||
* @param actionPayloadIdFn Function used to get id of action payload instance
|
* @param actionPayloadIdFn Function used to get id of action payload instance
|
||||||
* @param mergeAndGetFn Function used to merge graph table row and action payload instance
|
* @param mergeAndGetFn Function used to merge graph table row and action payload instance
|
||||||
|
* @param promoteActionStrategy the Actionset promotion strategy
|
||||||
* @param rowClazz Class of graph table
|
* @param rowClazz Class of graph table
|
||||||
* @param actionPayloadClazz Class of action payload
|
* @param actionPayloadClazz Class of action payload
|
||||||
* @param <G> Type of graph table row
|
* @param <G> Type of graph table row
|
||||||
|
@ -46,6 +47,7 @@ public class PromoteActionPayloadFunctions {
|
||||||
SerializableSupplier<Function<G, String>> rowIdFn,
|
SerializableSupplier<Function<G, String>> rowIdFn,
|
||||||
SerializableSupplier<Function<A, String>> actionPayloadIdFn,
|
SerializableSupplier<Function<A, String>> actionPayloadIdFn,
|
||||||
SerializableSupplier<BiFunction<G, A, G>> mergeAndGetFn,
|
SerializableSupplier<BiFunction<G, A, G>> mergeAndGetFn,
|
||||||
|
PromoteAction.Strategy promoteActionStrategy,
|
||||||
Class<G> rowClazz,
|
Class<G> rowClazz,
|
||||||
Class<A> actionPayloadClazz) {
|
Class<A> actionPayloadClazz) {
|
||||||
if (!isSubClass(rowClazz, actionPayloadClazz)) {
|
if (!isSubClass(rowClazz, actionPayloadClazz)) {
|
||||||
|
@ -61,7 +63,7 @@ public class PromoteActionPayloadFunctions {
|
||||||
.joinWith(
|
.joinWith(
|
||||||
actionPayloadWithIdDS,
|
actionPayloadWithIdDS,
|
||||||
rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")),
|
rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")),
|
||||||
"full_outer")
|
PromoteAction.joinTypeForStrategy(promoteActionStrategy))
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>) value -> {
|
(MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>) value -> {
|
||||||
Optional<G> rowOpt = Optional.ofNullable(value._1()).map(Tuple2::_2);
|
Optional<G> rowOpt = Optional.ofNullable(value._1()).map(Tuple2::_2);
|
||||||
|
|
|
@ -41,6 +41,12 @@
|
||||||
"paramDescription": "strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET",
|
"paramDescription": "strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"paramName": "pas",
|
||||||
|
"paramLongName": "promoteActionStrategy",
|
||||||
|
"paramDescription": "strategy for promoting the actionset contents into the graph tables, ENRICH or UPSERT (default)",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"paramName": "sgid",
|
"paramName": "sgid",
|
||||||
"paramLongName": "shouldGroupById",
|
"paramLongName": "shouldGroupById",
|
||||||
|
|
|
@ -115,6 +115,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/dataset</arg>
|
<arg>--outputGraphTablePath</arg><arg>${workingDir}/dataset</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="DecisionPromoteResultActionPayloadForDatasetTable"/>
|
<ok to="DecisionPromoteResultActionPayloadForDatasetTable"/>
|
||||||
|
@ -167,6 +168,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/dataset</arg>
|
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/dataset</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
|
|
|
@ -106,6 +106,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/datasource</arg>
|
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/datasource</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -106,6 +106,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/organization</arg>
|
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/organization</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -114,6 +114,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
<arg>--outputGraphTablePath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="DecisionPromoteResultActionPayloadForOtherResearchProductTable"/>
|
<ok to="DecisionPromoteResultActionPayloadForOtherResearchProductTable"/>
|
||||||
|
@ -166,6 +167,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/otherresearchproduct</arg>
|
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/otherresearchproduct</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
|
|
|
@ -106,6 +106,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/project</arg>
|
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/project</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -115,6 +115,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/publication</arg>
|
<arg>--outputGraphTablePath</arg><arg>${workingDir}/publication</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="DecisionPromoteResultActionPayloadForPublicationTable"/>
|
<ok to="DecisionPromoteResultActionPayloadForPublicationTable"/>
|
||||||
|
@ -167,6 +168,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/publication</arg>
|
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/publication</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
|
|
|
@ -107,6 +107,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/relation</arg>
|
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/relation</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -114,6 +114,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/software</arg>
|
<arg>--outputGraphTablePath</arg><arg>${workingDir}/software</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="DecisionPromoteResultActionPayloadForSoftwareTable"/>
|
<ok to="DecisionPromoteResultActionPayloadForSoftwareTable"/>
|
||||||
|
@ -166,6 +167,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/software</arg>
|
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/software</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
|
|
|
@ -54,7 +54,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
RuntimeException.class,
|
RuntimeException.class,
|
||||||
() -> PromoteActionPayloadFunctions
|
() -> PromoteActionPayloadFunctions
|
||||||
.joinGraphTableWithActionPayloadAndMerge(
|
.joinGraphTableWithActionPayloadAndMerge(
|
||||||
null, null, null, null, null, OafImplSubSub.class, OafImpl.class));
|
null, null, null, null, null, null, OafImplSubSub.class, OafImpl.class));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -104,6 +104,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
rowIdFn,
|
rowIdFn,
|
||||||
actionPayloadIdFn,
|
actionPayloadIdFn,
|
||||||
mergeAndGetFn,
|
mergeAndGetFn,
|
||||||
|
PromoteAction.Strategy.UPSERT,
|
||||||
OafImplSubSub.class,
|
OafImplSubSub.class,
|
||||||
OafImplSubSub.class)
|
OafImplSubSub.class)
|
||||||
.collectAsList();
|
.collectAsList();
|
||||||
|
@ -183,6 +184,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
rowIdFn,
|
rowIdFn,
|
||||||
actionPayloadIdFn,
|
actionPayloadIdFn,
|
||||||
mergeAndGetFn,
|
mergeAndGetFn,
|
||||||
|
PromoteAction.Strategy.UPSERT,
|
||||||
OafImplSubSub.class,
|
OafImplSubSub.class,
|
||||||
OafImplSub.class)
|
OafImplSub.class)
|
||||||
.collectAsList();
|
.collectAsList();
|
||||||
|
|
|
@ -122,22 +122,41 @@ public class DedupRecordFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
return Stream
|
return Stream
|
||||||
.concat(Stream.of(agg.getDedupId()), agg.aliases.stream())
|
.concat(
|
||||||
.map(id -> {
|
Stream
|
||||||
try {
|
.of(agg.getDedupId())
|
||||||
OafEntity res = (OafEntity) BeanUtils.cloneBean(agg.entity);
|
.map(id -> createDedupOafEntity(id, agg.entity, dataInfo, ts)),
|
||||||
res.setId(id);
|
agg.aliases
|
||||||
res.setDataInfo(dataInfo);
|
.stream()
|
||||||
res.setLastupdatetimestamp(ts);
|
.map(id -> createMergedDedupAliasOafEntity(id, agg.entity, dataInfo, ts)))
|
||||||
return res;
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.iterator();
|
.iterator();
|
||||||
}, beanEncoder);
|
}, beanEncoder);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static OafEntity createDedupOafEntity(String id, OafEntity base, DataInfo dataInfo, long ts) {
|
||||||
|
try {
|
||||||
|
OafEntity res = (OafEntity) BeanUtils.cloneBean(base);
|
||||||
|
res.setId(id);
|
||||||
|
res.setDataInfo(dataInfo);
|
||||||
|
res.setLastupdatetimestamp(ts);
|
||||||
|
return res;
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static OafEntity createMergedDedupAliasOafEntity(String id, OafEntity base, DataInfo dataInfo, long ts) {
|
||||||
|
try {
|
||||||
|
OafEntity res = createDedupOafEntity(id, base, dataInfo, ts);
|
||||||
|
DataInfo ds = (DataInfo) BeanUtils.cloneBean(dataInfo);
|
||||||
|
ds.setDeletedbyinference(true);
|
||||||
|
res.setDataInfo(ds);
|
||||||
|
return res;
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) {
|
private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) {
|
||||||
|
|
||||||
if (duplicate == null) {
|
if (duplicate == null) {
|
||||||
|
|
|
@ -238,11 +238,23 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
(Element) doc
|
(Element) doc
|
||||||
.selectSingleNode(
|
.selectSingleNode(
|
||||||
"//*[local-name()='metadata']/*[local-name() = 'resource']/*[local-name() = 'resourceType']"))
|
"//*[local-name()='metadata']/*[local-name() = 'resource']/*[local-name() = 'resourceType']"))
|
||||||
.map(element -> {
|
.map(e -> {
|
||||||
final String resourceTypeURI = element.attributeValue("uri");
|
final String resourceTypeURI = Optional
|
||||||
final String resourceTypeAnyURI = element.attributeValue("anyURI");
|
.ofNullable(e.attributeValue("uri"))
|
||||||
final String resourceTypeTxt = element.getText();
|
.filter(StringUtils::isNotBlank)
|
||||||
final String resourceTypeGeneral = element.attributeValue("resourceTypeGeneral");
|
.orElse(null);
|
||||||
|
final String resourceTypeAnyURI = Optional
|
||||||
|
.ofNullable(e.attributeValue("anyURI"))
|
||||||
|
.filter(StringUtils::isNotBlank)
|
||||||
|
.orElse(null);
|
||||||
|
final String resourceTypeTxt = Optional
|
||||||
|
.ofNullable(e.getText())
|
||||||
|
.filter(StringUtils::isNotBlank)
|
||||||
|
.orElse(null);
|
||||||
|
final String resourceTypeGeneral = Optional
|
||||||
|
.ofNullable(e.attributeValue("resourceTypeGeneral"))
|
||||||
|
.filter(StringUtils::isNotBlank)
|
||||||
|
.orElse(null);
|
||||||
|
|
||||||
return ObjectUtils
|
return ObjectUtils
|
||||||
.firstNonNull(resourceTypeURI, resourceTypeAnyURI, resourceTypeTxt, resourceTypeGeneral);
|
.firstNonNull(resourceTypeURI, resourceTypeAnyURI, resourceTypeTxt, resourceTypeGeneral);
|
||||||
|
|
|
@ -1171,6 +1171,34 @@ class MappersTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void test_Zenodo2() throws IOException {
|
||||||
|
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_zenodo2.xml")));
|
||||||
|
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||||
|
|
||||||
|
assertEquals(3, list.size());
|
||||||
|
Publication p = cleanup((Publication) list.get(0), vocs);
|
||||||
|
|
||||||
|
assertNotNull(p.getInstance());
|
||||||
|
assertEquals(1, p.getInstance().size());
|
||||||
|
|
||||||
|
final Instance instance = p.getInstance().get(0);
|
||||||
|
|
||||||
|
assertNotNull(instance.getInstanceTypeMapping());
|
||||||
|
assertEquals(1, instance.getInstanceTypeMapping().size());
|
||||||
|
|
||||||
|
Optional<InstanceTypeMapping> coarType = instance
|
||||||
|
.getInstanceTypeMapping()
|
||||||
|
.stream()
|
||||||
|
.filter(itm -> ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()))
|
||||||
|
.findFirst();
|
||||||
|
|
||||||
|
assertTrue(coarType.isPresent());
|
||||||
|
assertNotNull(coarType.get().getOriginalType());
|
||||||
|
assertNull(coarType.get().getTypeCode());
|
||||||
|
assertNull(coarType.get().getTypeLabel());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testROHub2() throws IOException {
|
void testROHub2() throws IOException {
|
||||||
final String xml = IOUtils
|
final String xml = IOUtils
|
||||||
|
@ -1229,7 +1257,7 @@ class MappersTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testD4ScienceTraining() throws IOException {
|
void testD4ScienceTraining() throws IOException {
|
||||||
final String xml = IOUtils
|
final String xml = IOUtils
|
||||||
.toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-1-training.xml")));
|
.toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-1-training.xml")));
|
||||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||||
|
@ -1240,7 +1268,7 @@ class MappersTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testD4ScienceDataset() throws IOException {
|
void testD4ScienceDataset() throws IOException {
|
||||||
final String xml = IOUtils
|
final String xml = IOUtils
|
||||||
.toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-2-dataset.xml")));
|
.toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-2-dataset.xml")));
|
||||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<record xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||||
|
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||||
|
xmlns:datacite="http://datacite.org/schema/kernel-3"
|
||||||
|
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||||
|
xmlns:dri="http://www.driver-repository.eu/namespace/dri">
|
||||||
|
<header xmlns="http://www.openarchives.org/OAI/2.0/">
|
||||||
|
<identifier>oai:zenodo.org:1596086</identifier>
|
||||||
|
<datestamp>2020-01-20T13:50:28Z</datestamp>
|
||||||
|
<setSpec>openaire</setSpec>
|
||||||
|
<dr:dateOfTransformation>2024-02-08T11:03:10.994Z</dr:dateOfTransformation>
|
||||||
|
<dri:objIdentifier>od______2659::036d5555a6688ed00c8d0da97bdece3b</dri:objIdentifier>
|
||||||
|
<dri:dateOfCollection>2024-02-08T11:03:10.994Z</dri:dateOfCollection>
|
||||||
|
<dri:dateOfTransformation>2024-02-08T11:03:10.994Z</dri:dateOfTransformation>
|
||||||
|
</header>
|
||||||
|
<metadata>
|
||||||
|
<resource xmlns="http://datacite.org/schema/kernel-4"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.1/metadata.xsd">
|
||||||
|
<identifier identifierType="URL">https://zenodo.org/record/1596086</identifier>
|
||||||
|
<alternateIdentifiers xmlns="http://datacite.org/schema/kernel-3"/>
|
||||||
|
<creators>
|
||||||
|
<creator>
|
||||||
|
<creatorName>Bonney, T. G.</creatorName>
|
||||||
|
<givenName>T. G.</givenName>
|
||||||
|
<familyName>Bonney</familyName>
|
||||||
|
</creator>
|
||||||
|
</creators>
|
||||||
|
<titles>
|
||||||
|
<title>Ice Blocks on a Moraine</title>
|
||||||
|
</titles>
|
||||||
|
<publisher>Zenodo</publisher>
|
||||||
|
<publicationYear>1889</publicationYear>
|
||||||
|
<dates>
|
||||||
|
<date dateType="Issued">1889-08-22</date>
|
||||||
|
</dates>
|
||||||
|
<resourceType resourceTypeGeneral="JournalArticle"/>
|
||||||
|
<relatedIdentifiers>
|
||||||
|
<relatedIdentifier relatedIdentifierType="DOI" relationType="IsIdenticalTo"
|
||||||
|
>10.1038/040391a0</relatedIdentifier>
|
||||||
|
</relatedIdentifiers>
|
||||||
|
<rightsList>
|
||||||
|
<rights rightsURI="https://creativecommons.org/publicdomain/zero/1.0/legalcode"
|
||||||
|
>Creative Commons Zero v1.0 Universal</rights>
|
||||||
|
<rights rightsURI="info:eu-repo/semantics/openAccess">Open Access</rights>
|
||||||
|
</rightsList>
|
||||||
|
<descriptions>
|
||||||
|
<description descriptionType="Abstract">n/a</description>
|
||||||
|
</descriptions>
|
||||||
|
</resource>
|
||||||
|
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||||
|
<oaf:dateAccepted>1889-08-22</oaf:dateAccepted>
|
||||||
|
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||||
|
<oaf:license>http://creativecommons.org/publicdomain/zero/1.0/legalcode</oaf:license>
|
||||||
|
<oaf:language/>
|
||||||
|
<oaf:hostedBy name="ZENODO" id="opendoar____::2659"/>
|
||||||
|
<oaf:collectedFrom name="ZENODO" id="opendoar____::2659"/>
|
||||||
|
</metadata>
|
||||||
|
</record>
|
Loading…
Reference in New Issue