Compare commits
40 Commits
master
...
9559_DBLP_
Author | SHA1 | Date |
---|---|---|
Serafeim Chatzopoulos | 842b309922 | |
Serafeim Chatzopoulos | b6e4d58817 | |
Michele Artini | cb29b9773c | |
Michele Artini | 85b844d57e | |
Michele Artini | 455f2e1e07 | |
Michele Artini | 88fef367b9 | |
Giambattista Bloisi | 3cd5590f3b | |
Giambattista Bloisi | 56dd05f85c | |
Sandro La Bruzzo | 7d806a434c | |
Sandro La Bruzzo | e468e99100 | |
Sandro La Bruzzo | b63994dcc4 | |
Sandro La Bruzzo | 915a76a796 | |
Giambattista Bloisi | 773e856550 | |
Sandro La Bruzzo | a712df1e1d | |
Sandro La Bruzzo | b32a9d1994 | |
Michele Artini | 3268570b2c | |
Claudio Atzori | 753c2a72bd | |
Claudio Atzori | a63b091bae | |
Giambattista Bloisi | 85aeff72f1 | |
Giambattista Bloisi | d65285da7f | |
Giambattista Bloisi | 29194472a7 | |
Claudio Atzori | d85d2df6ad | |
Giambattista Bloisi | b19643f6eb | |
Claudio Atzori | e6bdee86d1 | |
Claudio Atzori | 38c9001147 | |
Claudio Atzori | fd17c1f17c | |
Claudio Atzori | 009dcf6aea | |
Claudio Atzori | bb82052c40 | |
Claudio Atzori | 42f5506306 | |
Alessia Bardi | f2a08d8cc2 | |
Miriam Baglioni | a5995ab557 | |
Sandro La Bruzzo | 0386f36385 | |
Sandro La Bruzzo | 43e0bba7ed | |
Sandro La Bruzzo | e0753f19da | |
sandro.labruzzo | e328bc0ade | |
Sandro La Bruzzo | 859babf722 | |
Sandro La Bruzzo | 39ebb60b38 | |
Sandro La Bruzzo | 9d5a7c3b22 | |
Sandro La Bruzzo | 8f61063201 | |
Sandro La Bruzzo | 1a42a5c10d |
|
@ -312,7 +312,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
}
|
||||
|
||||
if (value instanceof Datasource) {
|
||||
// nothing to evaluate here
|
||||
final Datasource d = (Datasource) value;
|
||||
return Objects.nonNull(d.getOfficialname()) && StringUtils.isNotBlank(d.getOfficialname().getValue());
|
||||
} else if (value instanceof Project) {
|
||||
final Project p = (Project) value;
|
||||
return Objects.nonNull(p.getCode()) && StringUtils.isNotBlank(p.getCode().getValue());
|
||||
|
|
|
@ -23,15 +23,18 @@ public class InstanceTypeMatch extends AbstractListComparator {
|
|||
|
||||
// jolly types
|
||||
translationMap.put("Conference object", "*");
|
||||
translationMap.put("Research", "*");
|
||||
translationMap.put("Other literature type", "*");
|
||||
translationMap.put("Unknown", "*");
|
||||
translationMap.put("UNKNOWN", "*");
|
||||
|
||||
// article types
|
||||
translationMap.put("Article", "Article");
|
||||
translationMap.put("Journal", "Article");
|
||||
translationMap.put("Data Paper", "Article");
|
||||
translationMap.put("Software Paper", "Article");
|
||||
translationMap.put("Preprint", "Article");
|
||||
translationMap.put("Part of book or chapter of book", "Article");
|
||||
|
||||
// thesis types
|
||||
translationMap.put("Thesis", "Thesis");
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (c) 2024.
|
||||
* SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package eu.dnetlib.dhp.actionmanager.promote;
|
||||
|
||||
/** Encodes the Actionset promotion strategies */
|
||||
public class PromoteAction {
|
||||
|
||||
/** The supported actionset promotion strategies
|
||||
*
|
||||
* ENRICH: promotes only records in the actionset matching another record in the
|
||||
* graph and enriches them applying the given MergeAndGet strategy
|
||||
* UPSERT: promotes all the records in an actionset, matching records are updated
|
||||
* using the given MergeAndGet strategy, the non-matching record as inserted as they are.
|
||||
*/
|
||||
public enum Strategy {
|
||||
ENRICH, UPSERT
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string representation of the join type implementing the given PromoteAction.
|
||||
*
|
||||
* @param strategy the strategy to be used to promote the Actionset contents
|
||||
* @return the join type used to implement the promotion strategy
|
||||
*/
|
||||
public static String joinTypeForStrategy(PromoteAction.Strategy strategy) {
|
||||
switch (strategy) {
|
||||
case ENRICH:
|
||||
return "left_outer";
|
||||
case UPSERT:
|
||||
return "full_outer";
|
||||
default:
|
||||
throw new IllegalStateException("unsupported PromoteAction: " + strategy.toString());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -67,8 +67,9 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
String outputGraphTablePath = parser.get("outputGraphTablePath");
|
||||
logger.info("outputGraphTablePath: {}", outputGraphTablePath);
|
||||
|
||||
MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
|
||||
logger.info("strategy: {}", strategy);
|
||||
MergeAndGet.Strategy mergeAndGetStrategy = MergeAndGet.Strategy
|
||||
.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
|
||||
logger.info("mergeAndGetStrategy: {}", mergeAndGetStrategy);
|
||||
|
||||
Boolean shouldGroupById = Optional
|
||||
.ofNullable(parser.get("shouldGroupById"))
|
||||
|
@ -76,6 +77,12 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
.orElse(true);
|
||||
logger.info("shouldGroupById: {}", shouldGroupById);
|
||||
|
||||
PromoteAction.Strategy promoteActionStrategy = Optional
|
||||
.ofNullable(parser.get("promoteActionStrategy"))
|
||||
.map(PromoteAction.Strategy::valueOf)
|
||||
.orElse(PromoteAction.Strategy.UPSERT);
|
||||
logger.info("promoteActionStrategy: {}", promoteActionStrategy);
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
|
||||
@SuppressWarnings("unchecked")
|
||||
|
@ -97,7 +104,8 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
inputGraphTablePath,
|
||||
inputActionPayloadPath,
|
||||
outputGraphTablePath,
|
||||
strategy,
|
||||
mergeAndGetStrategy,
|
||||
promoteActionStrategy,
|
||||
rowClazz,
|
||||
actionPayloadClazz,
|
||||
shouldGroupById);
|
||||
|
@ -124,14 +132,16 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
String inputGraphTablePath,
|
||||
String inputActionPayloadPath,
|
||||
String outputGraphTablePath,
|
||||
MergeAndGet.Strategy strategy,
|
||||
MergeAndGet.Strategy mergeAndGetStrategy,
|
||||
PromoteAction.Strategy promoteActionStrategy,
|
||||
Class<G> rowClazz,
|
||||
Class<A> actionPayloadClazz, Boolean shouldGroupById) {
|
||||
Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
|
||||
Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
|
||||
|
||||
Dataset<G> result = promoteActionPayloadForGraphTable(
|
||||
rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz, shouldGroupById)
|
||||
rowDS, actionPayloadDS, mergeAndGetStrategy, promoteActionStrategy, rowClazz, actionPayloadClazz,
|
||||
shouldGroupById)
|
||||
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
|
||||
|
||||
saveGraphTable(result, outputGraphTablePath);
|
||||
|
@ -183,7 +193,8 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(
|
||||
Dataset<G> rowDS,
|
||||
Dataset<A> actionPayloadDS,
|
||||
MergeAndGet.Strategy strategy,
|
||||
MergeAndGet.Strategy mergeAndGetStrategy,
|
||||
PromoteAction.Strategy promoteActionStrategy,
|
||||
Class<G> rowClazz,
|
||||
Class<A> actionPayloadClazz,
|
||||
Boolean shouldGroupById) {
|
||||
|
@ -195,8 +206,9 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
|
||||
SerializableSupplier<Function<G, String>> rowIdFn = ModelSupport::idFn;
|
||||
SerializableSupplier<Function<A, String>> actionPayloadIdFn = ModelSupport::idFn;
|
||||
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy);
|
||||
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy);
|
||||
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet
|
||||
.functionFor(mergeAndGetStrategy);
|
||||
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(mergeAndGetStrategy);
|
||||
SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
|
||||
SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSourceAndTarget;
|
||||
|
||||
|
@ -207,6 +219,7 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
rowIdFn,
|
||||
actionPayloadIdFn,
|
||||
mergeRowWithActionPayloadAndGetFn,
|
||||
promoteActionStrategy,
|
||||
rowClazz,
|
||||
actionPayloadClazz);
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ public class PromoteActionPayloadFunctions {
|
|||
* @param rowIdFn Function used to get the id of graph table row
|
||||
* @param actionPayloadIdFn Function used to get id of action payload instance
|
||||
* @param mergeAndGetFn Function used to merge graph table row and action payload instance
|
||||
* @param promoteActionStrategy the Actionset promotion strategy
|
||||
* @param rowClazz Class of graph table
|
||||
* @param actionPayloadClazz Class of action payload
|
||||
* @param <G> Type of graph table row
|
||||
|
@ -46,6 +47,7 @@ public class PromoteActionPayloadFunctions {
|
|||
SerializableSupplier<Function<G, String>> rowIdFn,
|
||||
SerializableSupplier<Function<A, String>> actionPayloadIdFn,
|
||||
SerializableSupplier<BiFunction<G, A, G>> mergeAndGetFn,
|
||||
PromoteAction.Strategy promoteActionStrategy,
|
||||
Class<G> rowClazz,
|
||||
Class<A> actionPayloadClazz) {
|
||||
if (!isSubClass(rowClazz, actionPayloadClazz)) {
|
||||
|
@ -61,7 +63,7 @@ public class PromoteActionPayloadFunctions {
|
|||
.joinWith(
|
||||
actionPayloadWithIdDS,
|
||||
rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")),
|
||||
"full_outer")
|
||||
PromoteAction.joinTypeForStrategy(promoteActionStrategy))
|
||||
.map(
|
||||
(MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>) value -> {
|
||||
Optional<G> rowOpt = Optional.ofNullable(value._1()).map(Tuple2::_2);
|
||||
|
|
|
@ -41,6 +41,12 @@
|
|||
"paramDescription": "strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "pas",
|
||||
"paramLongName": "promoteActionStrategy",
|
||||
"paramDescription": "strategy for promoting the actionset contents into the graph tables, ENRICH or UPSERT (default)",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "sgid",
|
||||
"paramLongName": "shouldGroupById",
|
||||
|
|
|
@ -115,6 +115,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/dataset</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="DecisionPromoteResultActionPayloadForDatasetTable"/>
|
||||
|
@ -167,6 +168,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/dataset</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -106,6 +106,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/datasource</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -106,6 +106,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/organization</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -114,6 +114,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="DecisionPromoteResultActionPayloadForOtherResearchProductTable"/>
|
||||
|
@ -166,6 +167,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/otherresearchproduct</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -106,6 +106,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/project</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -115,6 +115,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/publication</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="DecisionPromoteResultActionPayloadForPublicationTable"/>
|
||||
|
@ -167,6 +168,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/publication</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -107,6 +107,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/relation</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -114,6 +114,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/software</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="DecisionPromoteResultActionPayloadForSoftwareTable"/>
|
||||
|
@ -166,6 +167,7 @@
|
|||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/software</arg>
|
||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||
<arg>--promoteActionStrategy</arg><arg>${promoteActionStrategy}</arg>
|
||||
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -54,7 +54,7 @@ public class PromoteActionPayloadFunctionsTest {
|
|||
RuntimeException.class,
|
||||
() -> PromoteActionPayloadFunctions
|
||||
.joinGraphTableWithActionPayloadAndMerge(
|
||||
null, null, null, null, null, OafImplSubSub.class, OafImpl.class));
|
||||
null, null, null, null, null, null, OafImplSubSub.class, OafImpl.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -104,6 +104,7 @@ public class PromoteActionPayloadFunctionsTest {
|
|||
rowIdFn,
|
||||
actionPayloadIdFn,
|
||||
mergeAndGetFn,
|
||||
PromoteAction.Strategy.UPSERT,
|
||||
OafImplSubSub.class,
|
||||
OafImplSubSub.class)
|
||||
.collectAsList();
|
||||
|
@ -183,6 +184,7 @@ public class PromoteActionPayloadFunctionsTest {
|
|||
rowIdFn,
|
||||
actionPayloadIdFn,
|
||||
mergeAndGetFn,
|
||||
PromoteAction.Strategy.UPSERT,
|
||||
OafImplSubSub.class,
|
||||
OafImplSub.class)
|
||||
.collectAsList();
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.dblp;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.Constants;
|
||||
import eu.dnetlib.dhp.actionmanager.bipaffiliations.PrepareAffiliationRelations;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* Creates action sets for DBLP data
|
||||
*/
|
||||
public class PrepareDblpActionSets implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareDblpActionSets.class);
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
private static final String ID_PREFIX = "50|doi_________::";
|
||||
public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:bipinference";
|
||||
public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by BIP!";
|
||||
public static final String BIP_INFERENCE_PROVENANCE = "bip:affiliation:crossref";
|
||||
|
||||
public static <I extends Result> void main(String[] args) throws Exception {
|
||||
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
PrepareDblpActionSets.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/actionmanager/dblp/input_actionset_parameter.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Constants.isSparkSessionManaged(parser);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String dblpInputPath = parser.get("dblpInputPath");
|
||||
log.info("dblpInputPath: {}", dblpInputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Constants.removeOutputDir(spark, outputPath);
|
||||
|
||||
// TODO: add DBLP ID in ModelConstants
|
||||
List<KeyValue> collectedFromDBLP = OafMapperUtils
|
||||
.listKeyValues(ModelConstants.CROSSREF_ID, "DBLP");
|
||||
JavaPairRDD<Text, Text> dblpData = prepareDblpData(
|
||||
spark, dblpInputPath, collectedFromDBLP);
|
||||
|
||||
dblpData
|
||||
.saveAsHadoopFile(
|
||||
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
private static <I extends Result> JavaPairRDD<Text, Text> prepareDblpData(SparkSession spark,
|
||||
String inputPath,
|
||||
List<KeyValue> collectedFrom) {
|
||||
|
||||
log.info("Reading DBLP XML data");
|
||||
//
|
||||
// TODO: load DBLP data into a Dataset
|
||||
Dataset<Row> df = spark
|
||||
.read()
|
||||
.schema("`DOI` STRING, `Matchings` ARRAY<STRUCT<`RORid`:STRING,`Confidence`:DOUBLE>>")
|
||||
.json(inputPath);
|
||||
|
||||
return df.map((MapFunction<Row, Result>) bs -> {
|
||||
Result result = new Result();
|
||||
|
||||
// TODO: map DBLP data to Result objects
|
||||
|
||||
return result;
|
||||
|
||||
}, Encoders.bean(Result.class))
|
||||
.toJavaRDD()
|
||||
.map(p -> new AtomicAction(Result.class, p))
|
||||
.mapToPair(
|
||||
aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||
new Text(OBJECT_MAPPER.writeValueAsString(aa))));
|
||||
}
|
||||
}
|
|
@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory;
|
|||
import eu.dnetlib.dhp.aggregation.common.ReporterCallback;
|
||||
import eu.dnetlib.dhp.aggregation.common.ReportingJob;
|
||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.base.BaseCollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.file.FileCollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.file.FileGZipCollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin;
|
||||
|
@ -120,6 +121,8 @@ public class CollectorWorker extends ReportingJob {
|
|||
return new FileCollectorPlugin(fileSystem);
|
||||
case fileGzip:
|
||||
return new FileGZipCollectorPlugin(fileSystem);
|
||||
case baseDump:
|
||||
return new BaseCollectorPlugin(this.fileSystem);
|
||||
case other:
|
||||
final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
|
||||
.ofNullable(api.getParams().get("other_plugin_type"))
|
||||
|
|
|
@ -0,0 +1,244 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
import javax.swing.*;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.http.HttpHeaders;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||
|
||||
public class ORCIDWorker extends Thread {
|
||||
|
||||
final static Logger log = LoggerFactory.getLogger(ORCIDWorker.class);
|
||||
|
||||
public static String JOB_COMPLETE = "JOB_COMPLETE";
|
||||
|
||||
private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
|
||||
|
||||
private final BlockingQueue<String> queue;
|
||||
|
||||
private boolean hasComplete = false;
|
||||
|
||||
private final SequenceFile.Writer employments;
|
||||
|
||||
private final SequenceFile.Writer summary;
|
||||
private final SequenceFile.Writer works;
|
||||
|
||||
private final String token;
|
||||
|
||||
private final String id;
|
||||
|
||||
public static ORCIDWorkerBuilder builder() {
|
||||
return new ORCIDWorkerBuilder();
|
||||
}
|
||||
|
||||
public ORCIDWorker(String id, BlockingQueue<String> myqueue, SequenceFile.Writer employments,
|
||||
SequenceFile.Writer summary, SequenceFile.Writer works, String token) {
|
||||
this.id = id;
|
||||
this.queue = myqueue;
|
||||
this.employments = employments;
|
||||
this.summary = summary;
|
||||
this.works = works;
|
||||
this.token = token;
|
||||
}
|
||||
|
||||
public static String retrieveURL(final String id, final String apiUrl, String token) {
|
||||
try {
|
||||
final HttpURLConnection urlConn = getHttpURLConnection(apiUrl, token);
|
||||
if (urlConn.getResponseCode() > 199 && urlConn.getResponseCode() < 300) {
|
||||
InputStream input = urlConn.getInputStream();
|
||||
return IOUtils.toString(input);
|
||||
} else {
|
||||
log
|
||||
.error(
|
||||
"Thread {} UNABLE TO DOWNLOAD FROM THIS URL {} , status code {}", id, apiUrl,
|
||||
urlConn.getResponseCode());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Thread {} Error on retrieving URL {} {}", id, apiUrl, e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@NotNull
|
||||
private static HttpURLConnection getHttpURLConnection(String apiUrl, String token) throws IOException {
|
||||
final HttpURLConnection urlConn = (HttpURLConnection) new URL(apiUrl).openConnection();
|
||||
final HttpClientParams clientParams = new HttpClientParams();
|
||||
urlConn.setInstanceFollowRedirects(false);
|
||||
urlConn.setReadTimeout(clientParams.getReadTimeOut() * 1000);
|
||||
urlConn.setConnectTimeout(clientParams.getConnectTimeOut() * 1000);
|
||||
urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent);
|
||||
urlConn.addRequestProperty(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", token));
|
||||
return urlConn;
|
||||
}
|
||||
|
||||
private static String generateSummaryURL(final String orcidId) {
|
||||
return "https://api.orcid.org/v3.0/" + orcidId + "/record";
|
||||
}
|
||||
|
||||
private static String generateWorksURL(final String orcidId) {
|
||||
return "https://api.orcid.org/v3.0/" + orcidId + "/works";
|
||||
}
|
||||
|
||||
private static String generateEmploymentsURL(final String orcidId) {
|
||||
return "https://api.orcid.org/v3.0/" + orcidId + "/employments";
|
||||
}
|
||||
|
||||
private static void writeResultToSequenceFile(String id, String url, String token, String orcidId,
|
||||
SequenceFile.Writer file) throws IOException {
|
||||
final String response = retrieveURL(id, url, token);
|
||||
if (response != null) {
|
||||
if (orcidId == null) {
|
||||
log.error("Thread {} {} {}", id, orcidId, response);
|
||||
throw new RuntimeException("null items ");
|
||||
}
|
||||
|
||||
if (file == null) {
|
||||
log.error("Thread {} file is null for {} URL:{}", id, url, orcidId);
|
||||
} else {
|
||||
file.append(new Text(orcidId), new Text(response));
|
||||
file.hflush();
|
||||
}
|
||||
|
||||
} else
|
||||
log.error("Thread {} response is null for {} URL:{}", id, url, orcidId);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
final Text key = new Text();
|
||||
final Text value = new Text();
|
||||
long start;
|
||||
long total_time;
|
||||
String orcidId = "";
|
||||
int requests = 0;
|
||||
if (summary == null || employments == null || works == null)
|
||||
throw new RuntimeException("Null files");
|
||||
|
||||
while (!hasComplete) {
|
||||
try {
|
||||
|
||||
orcidId = queue.take();
|
||||
|
||||
if (orcidId.equalsIgnoreCase(JOB_COMPLETE)) {
|
||||
hasComplete = true;
|
||||
} else {
|
||||
start = System.currentTimeMillis();
|
||||
writeResultToSequenceFile(id, generateSummaryURL(orcidId), token, orcidId, summary);
|
||||
total_time = System.currentTimeMillis() - start;
|
||||
requests++;
|
||||
if (total_time < 1000) {
|
||||
// I know making a sleep on a thread is bad, but we need to stay to 24 requests per seconds,
|
||||
// hence
|
||||
// the time between two http request in a thread must be 1 second
|
||||
Thread.sleep(1000L - total_time);
|
||||
}
|
||||
start = System.currentTimeMillis();
|
||||
writeResultToSequenceFile(id, generateWorksURL(orcidId), token, orcidId, works);
|
||||
total_time = System.currentTimeMillis() - start;
|
||||
requests++;
|
||||
if (total_time < 1000) {
|
||||
// I know making a sleep on a thread is bad, but we need to stay to 24 requests per seconds,
|
||||
// hence
|
||||
// the time between two http request in a thread must be 1 second
|
||||
Thread.sleep(1000L - total_time);
|
||||
}
|
||||
start = System.currentTimeMillis();
|
||||
writeResultToSequenceFile(id, generateEmploymentsURL(orcidId), token, orcidId, employments);
|
||||
total_time = System.currentTimeMillis() - start;
|
||||
requests++;
|
||||
if (total_time < 1000) {
|
||||
// I know making a sleep on a thread is bad, but we need to stay to 24 requests per seconds,
|
||||
// hence
|
||||
// the time between two http request in a thread must be 1 second
|
||||
Thread.sleep(1000L - total_time);
|
||||
}
|
||||
if (requests % 30 == 0) {
|
||||
log.info("Thread {} Downloaded {}", id, requests);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Throwable e) {
|
||||
|
||||
log.error("Thread {} Unable to save ORICD: {} item error", id, orcidId, e);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
try {
|
||||
works.close();
|
||||
summary.close();
|
||||
employments.close();
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
log.info("Thread {} COMPLETE ", id);
|
||||
log.info("Thread {} Downloaded {}", id, requests);
|
||||
|
||||
}
|
||||
|
||||
public static class ORCIDWorkerBuilder {
|
||||
|
||||
private String id;
|
||||
private SequenceFile.Writer employments;
|
||||
private SequenceFile.Writer summary;
|
||||
private SequenceFile.Writer works;
|
||||
private BlockingQueue<String> queue;
|
||||
|
||||
private String token;
|
||||
|
||||
public ORCIDWorkerBuilder withId(final String id) {
|
||||
this.id = id;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ORCIDWorkerBuilder withEmployments(final SequenceFile.Writer sequenceFile) {
|
||||
this.employments = sequenceFile;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ORCIDWorkerBuilder withSummary(final SequenceFile.Writer sequenceFile) {
|
||||
this.summary = sequenceFile;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ORCIDWorkerBuilder withWorks(final SequenceFile.Writer sequenceFile) {
|
||||
this.works = sequenceFile;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ORCIDWorkerBuilder withAccessToken(final String accessToken) {
|
||||
this.token = accessToken;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ORCIDWorkerBuilder withBlockingQueue(final BlockingQueue<String> queue) {
|
||||
this.queue = queue;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ORCIDWorker build() {
|
||||
if (this.summary == null || this.works == null || this.employments == null || StringUtils.isEmpty(token)
|
||||
|| queue == null)
|
||||
throw new RuntimeException("Unable to build missing required params");
|
||||
return new ORCIDWorker(id, queue, employments, summary, works, token);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,171 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid;
|
||||
|
||||
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
||||
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||
|
||||
public class OrcidGetUpdatesFile {
|
||||
|
||||
private static Logger log = LoggerFactory.getLogger(OrcidGetUpdatesFile.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
OrcidGetUpdatesFile.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/collection/orcid/download_orcid_update_parameter.json")))
|
||||
|
||||
);
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String namenode = parser.get("namenode");
|
||||
log.info("got variable namenode: {}", namenode);
|
||||
|
||||
final String master = parser.get("master");
|
||||
log.info("got variable master: {}", master);
|
||||
|
||||
final String targetPath = parser.get("targetPath");
|
||||
log.info("got variable targetPath: {}", targetPath);
|
||||
|
||||
final String apiURL = parser.get("apiURL");
|
||||
log.info("got variable apiURL: {}", apiURL);
|
||||
|
||||
final String accessToken = parser.get("accessToken");
|
||||
log.info("got variable accessToken: {}", accessToken);
|
||||
|
||||
final String graphPath = parser.get("graphPath");
|
||||
log.info("got variable graphPath: {}", graphPath);
|
||||
|
||||
final SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName(OrcidGetUpdatesFile.class.getName())
|
||||
.master(master)
|
||||
.getOrCreate();
|
||||
|
||||
final String latestDate = spark
|
||||
.read()
|
||||
.load(graphPath + "/Authors")
|
||||
.selectExpr("max(lastModifiedDate)")
|
||||
.first()
|
||||
.getString(0);
|
||||
|
||||
log.info("latest date is {}", latestDate);
|
||||
|
||||
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(namenode));
|
||||
|
||||
new OrcidGetUpdatesFile().readTar(fileSystem, accessToken, apiURL, targetPath, latestDate);
|
||||
|
||||
}
|
||||
|
||||
private SequenceFile.Writer createFile(Path aPath, FileSystem fileSystem) throws IOException {
|
||||
return SequenceFile
|
||||
.createWriter(
|
||||
fileSystem.getConf(),
|
||||
SequenceFile.Writer.file(aPath),
|
||||
SequenceFile.Writer.keyClass(Text.class),
|
||||
SequenceFile.Writer.valueClass(Text.class));
|
||||
}
|
||||
|
||||
private ORCIDWorker createWorker(final String id, final String targetPath, final BlockingQueue<String> queue,
|
||||
final String accessToken, FileSystem fileSystem) throws Exception {
|
||||
return ORCIDWorker
|
||||
.builder()
|
||||
.withId(id)
|
||||
.withEmployments(createFile(new Path(String.format("%s/employments_%s", targetPath, id)), fileSystem))
|
||||
.withSummary(createFile(new Path(String.format("%s/summary_%s", targetPath, id)), fileSystem))
|
||||
.withWorks(createFile(new Path(String.format("%s/works_%s", targetPath, id)), fileSystem))
|
||||
.withAccessToken(accessToken)
|
||||
.withBlockingQueue(queue)
|
||||
.build();
|
||||
}
|
||||
|
||||
public void readTar(FileSystem fileSystem, final String accessToken, final String apiURL, final String targetPath,
|
||||
final String startDate) throws Exception {
|
||||
final HttpURLConnection urlConn = (HttpURLConnection) new URL(apiURL).openConnection();
|
||||
final HttpClientParams clientParams = new HttpClientParams();
|
||||
urlConn.setInstanceFollowRedirects(false);
|
||||
urlConn.setReadTimeout(clientParams.getReadTimeOut() * 1000);
|
||||
urlConn.setConnectTimeout(clientParams.getConnectTimeOut() * 1000);
|
||||
if (urlConn.getResponseCode() > 199 && urlConn.getResponseCode() < 300) {
|
||||
InputStream input = urlConn.getInputStream();
|
||||
|
||||
Path hdfsWritePath = new Path("/tmp/orcid_updates.tar.gz");
|
||||
final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true);
|
||||
IOUtils.copy(input, fsDataOutputStream);
|
||||
fsDataOutputStream.flush();
|
||||
fsDataOutputStream.close();
|
||||
FSDataInputStream updateFile = fileSystem.open(hdfsWritePath);
|
||||
TarArchiveInputStream tais = new TarArchiveInputStream(new GzipCompressorInputStream(
|
||||
new BufferedInputStream(
|
||||
updateFile.getWrappedStream())));
|
||||
TarArchiveEntry entry;
|
||||
|
||||
BlockingQueue<String> queue = new ArrayBlockingQueue<String>(3000);
|
||||
final List<ORCIDWorker> workers = new ArrayList<>();
|
||||
for (int i = 0; i < 22; i++) {
|
||||
workers.add(createWorker("" + i, targetPath, queue, accessToken, fileSystem));
|
||||
}
|
||||
workers.forEach(Thread::start);
|
||||
|
||||
while ((entry = tais.getNextTarEntry()) != null) {
|
||||
|
||||
if (entry.isFile()) {
|
||||
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(tais));
|
||||
System.out.println(br.readLine());
|
||||
br
|
||||
.lines()
|
||||
.map(l -> l.split(","))
|
||||
.filter(s -> StringUtils.compare(s[3].substring(0, 10), startDate) > 0)
|
||||
.map(s -> s[0])
|
||||
.forEach(s -> {
|
||||
try {
|
||||
queue.put(s);
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 22; i++) {
|
||||
queue.put(ORCIDWorker.JOB_COMPLETE);
|
||||
}
|
||||
for (ORCIDWorker worker : workers) {
|
||||
worker.join();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -1,11 +1,15 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.orcid;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentFactory;
|
||||
import org.dom4j.DocumentHelper;
|
||||
import org.dom4j.Node;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -40,8 +44,8 @@ public class OrcidParser {
|
|||
private static final String NS_ERROR = "error";
|
||||
private static final String NS_HISTORY = "history";
|
||||
private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history";
|
||||
private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk";
|
||||
private static final String NS_BULK = "bulk";
|
||||
private static final String NS_EMPLOYMENT = "employment";
|
||||
private static final String NS_EMPLOYMENT_URL = "http://www.orcid.org/ns/employment";
|
||||
private static final String NS_EXTERNAL = "external-identifier";
|
||||
private static final String NS_EXTERNAL_URL = "http://www.orcid.org/ns/external-identifier";
|
||||
|
||||
|
@ -61,6 +65,7 @@ public class OrcidParser {
|
|||
ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
|
||||
ap.declareXPathNameSpace(NS_EXTERNAL, NS_EXTERNAL_URL);
|
||||
ap.declareXPathNameSpace(NS_ACTIVITIES, NS_ACTIVITIES_URL);
|
||||
ap.declareXPathNameSpace(NS_EMPLOYMENT, NS_EMPLOYMENT_URL);
|
||||
}
|
||||
|
||||
public Author parseSummary(final String xml) {
|
||||
|
@ -70,13 +75,15 @@ public class OrcidParser {
|
|||
generateParsedDocument(xml);
|
||||
List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
|
||||
.getTextValuesWithAttributes(
|
||||
ap, vn, "//record:record", Arrays.asList("path"));
|
||||
ap, vn, "//record:record", Collections.singletonList("path"));
|
||||
if (!recordNodes.isEmpty()) {
|
||||
final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
|
||||
author.setOrcid(oid);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
final String ltm = VtdUtilityParser.getSingleValue(ap, vn, "//common:last-modified-date");
|
||||
author.setLastModifiedDate(ltm);
|
||||
List<VtdUtilityParser.Node> personNodes = VtdUtilityParser
|
||||
.getTextValuesWithAttributes(
|
||||
ap, vn, "//person:name", Arrays.asList("visibility"));
|
||||
|
@ -129,6 +136,64 @@ public class OrcidParser {
|
|||
}
|
||||
}
|
||||
|
||||
public List<Work> parseWorks(final String xml) {
|
||||
|
||||
try {
|
||||
String oid;
|
||||
|
||||
generateParsedDocument(xml);
|
||||
List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
|
||||
.getTextValuesWithAttributes(ap, vn, "//activities:works", Arrays.asList("path", "visibility"));
|
||||
if (!workNodes.isEmpty()) {
|
||||
oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
|
||||
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
final List<Work> works = new ArrayList<>();
|
||||
ap.selectXPath("//work:work-summary");
|
||||
|
||||
while (ap.evalXPath() != -1) {
|
||||
final Work work = new Work();
|
||||
work.setOrcid(oid);
|
||||
final AutoPilot ap1 = new AutoPilot(ap.getNav());
|
||||
ap1.selectXPath("./work:title/common:title");
|
||||
while (ap1.evalXPath() != -1) {
|
||||
int it = vn.getText();
|
||||
work.setTitle(vn.toNormalizedString(it));
|
||||
}
|
||||
ap1.selectXPath(".//common:external-id");
|
||||
while (ap1.evalXPath() != -1) {
|
||||
final Pid pid = new Pid();
|
||||
|
||||
final AutoPilot ap2 = new AutoPilot(ap1.getNav());
|
||||
|
||||
ap2.selectXPath("./common:external-id-type");
|
||||
while (ap2.evalXPath() != -1) {
|
||||
int it = vn.getText();
|
||||
pid.setSchema(vn.toNormalizedString(it));
|
||||
}
|
||||
ap2.selectXPath("./common:external-id-value");
|
||||
while (ap2.evalXPath() != -1) {
|
||||
int it = vn.getText();
|
||||
pid.setValue(vn.toNormalizedString(it));
|
||||
}
|
||||
|
||||
work.addPid(pid);
|
||||
}
|
||||
|
||||
works.add(work);
|
||||
}
|
||||
return works;
|
||||
|
||||
} catch (Throwable e) {
|
||||
log.error("Error on parsing {}", xml);
|
||||
log.error(e.getMessage());
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public Work parseWork(final String xml) {
|
||||
|
||||
try {
|
||||
|
@ -176,11 +241,15 @@ public class OrcidParser {
|
|||
}
|
||||
|
||||
private String extractEmploymentDate(final String xpath) throws Exception {
|
||||
return extractEmploymentDate(xpath, ap);
|
||||
}
|
||||
|
||||
ap.selectXPath(xpath);
|
||||
private String extractEmploymentDate(final String xpath, AutoPilot pp) throws Exception {
|
||||
|
||||
pp.selectXPath(xpath);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
while (ap.evalXPath() != -1) {
|
||||
final AutoPilot ap1 = new AutoPilot(ap.getNav());
|
||||
while (pp.evalXPath() != -1) {
|
||||
final AutoPilot ap1 = new AutoPilot(pp.getNav());
|
||||
ap1.selectXPath("./common:year");
|
||||
while (ap1.evalXPath() != -1) {
|
||||
int it = vn.getText();
|
||||
|
@ -203,6 +272,104 @@ public class OrcidParser {
|
|||
|
||||
}
|
||||
|
||||
public List<Employment> parseEmployments(final String xml) {
|
||||
try {
|
||||
String oid;
|
||||
Map<String, String> nsContext = getNameSpaceMap();
|
||||
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
||||
Document doc = DocumentHelper.parseText(xml);
|
||||
oid = doc.valueOf("//activities:employments/@path");
|
||||
if (oid == null || StringUtils.isEmpty(oid))
|
||||
return null;
|
||||
final String orcid = oid.split("/")[1];
|
||||
|
||||
List<Node> nodes = doc.selectNodes("//employment:employment-summary");
|
||||
return nodes.stream().map(n -> {
|
||||
final Employment e = new Employment();
|
||||
e.setOrcid(orcid);
|
||||
|
||||
final String depName = n.valueOf(".//common:department-name");
|
||||
if (StringUtils.isNotBlank(depName))
|
||||
e.setDepartmentName(depName);
|
||||
final String roleTitle = n.valueOf(".//common:role-title");
|
||||
e.setRoleTitle(roleTitle);
|
||||
final String organizationName = n.valueOf(".//common:organization/common:name");
|
||||
if (StringUtils.isEmpty(e.getDepartmentName()))
|
||||
e.setDepartmentName(organizationName);
|
||||
final Pid p = new Pid();
|
||||
final String pid = n
|
||||
.valueOf(
|
||||
"./common:organization/common:disambiguated-organization/common:disambiguated-organization-identifier");
|
||||
p.setValue(pid);
|
||||
final String pidType = n
|
||||
.valueOf("./common:organization/common:disambiguated-organization/common:disambiguation-source");
|
||||
p.setSchema(pidType);
|
||||
e.setAffiliationId(p);
|
||||
|
||||
final StringBuilder aDate = new StringBuilder();
|
||||
final String sy = n.valueOf("./common:start-date/common:year");
|
||||
if (StringUtils.isNotBlank(sy)) {
|
||||
aDate.append(sy);
|
||||
final String sm = n.valueOf("./common:start-date/common:month");
|
||||
final String sd = n.valueOf("./common:start-date/common:day");
|
||||
aDate.append("-");
|
||||
if (StringUtils.isNotBlank(sm))
|
||||
aDate.append(sm);
|
||||
else
|
||||
aDate.append("01");
|
||||
aDate.append("-");
|
||||
if (StringUtils.isNotBlank(sd))
|
||||
aDate.append(sd);
|
||||
else
|
||||
aDate.append("01");
|
||||
e.setEndDate(aDate.toString());
|
||||
}
|
||||
|
||||
final String ey = n.valueOf("./common:end-date/common:year");
|
||||
if (StringUtils.isNotBlank(ey)) {
|
||||
aDate.append(ey);
|
||||
final String em = n.valueOf("./common:end-date/common:month");
|
||||
final String ed = n.valueOf("./common:end-date/common:day");
|
||||
aDate.append("-");
|
||||
if (StringUtils.isNotBlank(em))
|
||||
aDate.append(em);
|
||||
else
|
||||
aDate.append("01");
|
||||
aDate.append("-");
|
||||
if (StringUtils.isNotBlank(ed))
|
||||
aDate.append(ed);
|
||||
else
|
||||
aDate.append("01");
|
||||
e.setEndDate(aDate.toString());
|
||||
}
|
||||
|
||||
return e;
|
||||
|
||||
}).collect(Collectors.toList());
|
||||
} catch (Throwable e) {
|
||||
log.error("Error on parsing {}", xml);
|
||||
log.error(e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@NotNull
|
||||
private static Map<String, String> getNameSpaceMap() {
|
||||
Map<String, String> nsContext = new HashMap<>();
|
||||
nsContext.put(NS_COMMON, NS_COMMON_URL);
|
||||
nsContext.put(NS_PERSON, NS_PERSON_URL);
|
||||
nsContext.put(NS_DETAILS, NS_DETAILS_URL);
|
||||
nsContext.put(NS_OTHER, NS_OTHER_URL);
|
||||
nsContext.put(NS_RECORD, NS_RECORD_URL);
|
||||
nsContext.put(NS_ERROR, NS_ERROR_URL);
|
||||
nsContext.put(NS_HISTORY, NS_HISTORY_URL);
|
||||
nsContext.put(NS_WORK, NS_WORK_URL);
|
||||
nsContext.put(NS_EXTERNAL, NS_EXTERNAL_URL);
|
||||
nsContext.put(NS_ACTIVITIES, NS_ACTIVITIES_URL);
|
||||
nsContext.put(NS_EMPLOYMENT, NS_EMPLOYMENT_URL);
|
||||
return nsContext;
|
||||
}
|
||||
|
||||
public Employment parseEmployment(final String xml) {
|
||||
try {
|
||||
final Employment employment = new Employment();
|
||||
|
|
|
@ -18,6 +18,8 @@ public class Author extends ORCIDItem {
|
|||
|
||||
private String biography;
|
||||
|
||||
private String lastModifiedDate;
|
||||
|
||||
public String getBiography() {
|
||||
return biography;
|
||||
}
|
||||
|
@ -74,6 +76,14 @@ public class Author extends ORCIDItem {
|
|||
this.otherPids = otherPids;
|
||||
}
|
||||
|
||||
public String getLastModifiedDate() {
|
||||
return lastModifiedDate;
|
||||
}
|
||||
|
||||
public void setLastModifiedDate(String lastModifiedDate) {
|
||||
this.lastModifiedDate = lastModifiedDate;
|
||||
}
|
||||
|
||||
public void addOtherPid(final Pid pid) {
|
||||
|
||||
if (otherPids == null)
|
||||
|
|
|
@ -10,7 +10,8 @@ import eu.dnetlib.dhp.common.collection.CollectorException;
|
|||
public interface CollectorPlugin {
|
||||
|
||||
enum NAME {
|
||||
oai, other, rest_json2xml, file, fileGzip;
|
||||
|
||||
oai, other, rest_json2xml, file, fileGzip, baseDump;
|
||||
|
||||
public enum OTHER_NAME {
|
||||
mdstore_mongodb_dump, mdstore_mongodb
|
||||
|
|
|
@ -0,0 +1,171 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.StringWriter;
|
||||
import java.util.Iterator;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
import javax.xml.stream.XMLEventReader;
|
||||
import javax.xml.stream.XMLEventWriter;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLOutputFactory;
|
||||
import javax.xml.stream.events.EndElement;
|
||||
import javax.xml.stream.events.StartElement;
|
||||
import javax.xml.stream.events.XMLEvent;
|
||||
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
||||
import org.apache.commons.compress.compressors.CompressorInputStream;
|
||||
import org.apache.commons.compress.compressors.CompressorStreamFactory;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
|
||||
public class BaseCollectorIterator implements Iterator<String> {
|
||||
|
||||
private String nextElement;
|
||||
|
||||
private final BlockingQueue<String> queue = new LinkedBlockingQueue<>(100);
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(BaseCollectorIterator.class);
|
||||
|
||||
private static final String END_ELEM = "__END__";
|
||||
|
||||
public BaseCollectorIterator(final FileSystem fs, final Path filePath, final AggregatorReport report) {
|
||||
new Thread(() -> importHadoopFile(fs, filePath, report)).start();
|
||||
try {
|
||||
this.nextElement = this.queue.take();
|
||||
} catch (final InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
protected BaseCollectorIterator(final String resourcePath, final AggregatorReport report) {
|
||||
new Thread(() -> importTestFile(resourcePath, report)).start();
|
||||
try {
|
||||
this.nextElement = this.queue.take();
|
||||
} catch (final InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean hasNext() {
|
||||
return (this.nextElement != null) & !END_ELEM.equals(this.nextElement);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String next() {
|
||||
try {
|
||||
return END_ELEM.equals(this.nextElement) ? null : this.nextElement;
|
||||
} finally {
|
||||
try {
|
||||
this.nextElement = this.queue.take();
|
||||
} catch (final InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void importHadoopFile(final FileSystem fs, final Path filePath, final AggregatorReport report) {
|
||||
log.info("I start to read the TAR stream");
|
||||
|
||||
try (InputStream origInputStream = fs.open(filePath);
|
||||
final TarArchiveInputStream tarInputStream = new TarArchiveInputStream(origInputStream)) {
|
||||
importTarStream(tarInputStream, report);
|
||||
} catch (final Throwable e) {
|
||||
throw new RuntimeException("Error processing BASE records", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void importTestFile(final String resourcePath, final AggregatorReport report) {
|
||||
try (final InputStream origInputStream = BaseCollectorIterator.class.getResourceAsStream(resourcePath);
|
||||
final TarArchiveInputStream tarInputStream = new TarArchiveInputStream(origInputStream)) {
|
||||
importTarStream(tarInputStream, report);
|
||||
} catch (final Throwable e) {
|
||||
throw new RuntimeException("Error processing BASE records", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void importTarStream(final TarArchiveInputStream tarInputStream, final AggregatorReport report) {
|
||||
long count = 0;
|
||||
|
||||
final XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
|
||||
final XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance();
|
||||
|
||||
try {
|
||||
TarArchiveEntry entry;
|
||||
while ((entry = (TarArchiveEntry) tarInputStream.getNextEntry()) != null) {
|
||||
final String name = entry.getName();
|
||||
|
||||
if (!entry.isDirectory() && name.contains("ListRecords") && name.endsWith(".bz2")) {
|
||||
|
||||
log.info("Processing file (BZIP): " + name);
|
||||
|
||||
final byte[] bzipData = new byte[(int) entry.getSize()];
|
||||
IOUtils.readFully(tarInputStream, bzipData);
|
||||
|
||||
try (InputStream bzipIs = new ByteArrayInputStream(bzipData);
|
||||
final BufferedInputStream bzipBis = new BufferedInputStream(bzipIs);
|
||||
final CompressorInputStream bzipInput = new CompressorStreamFactory()
|
||||
.createCompressorInputStream(bzipBis)) {
|
||||
|
||||
final XMLEventReader reader = xmlInputFactory.createXMLEventReader(bzipInput);
|
||||
|
||||
XMLEventWriter eventWriter = null;
|
||||
StringWriter xmlWriter = null;
|
||||
|
||||
while (reader.hasNext()) {
|
||||
final XMLEvent nextEvent = reader.nextEvent();
|
||||
|
||||
if (nextEvent.isStartElement()) {
|
||||
final StartElement startElement = nextEvent.asStartElement();
|
||||
if ("record".equals(startElement.getName().getLocalPart())) {
|
||||
xmlWriter = new StringWriter();
|
||||
eventWriter = xmlOutputFactory.createXMLEventWriter(xmlWriter);
|
||||
}
|
||||
}
|
||||
|
||||
if (eventWriter != null) {
|
||||
eventWriter.add(nextEvent);
|
||||
}
|
||||
|
||||
if (nextEvent.isEndElement()) {
|
||||
final EndElement endElement = nextEvent.asEndElement();
|
||||
if ("record".equals(endElement.getName().getLocalPart())) {
|
||||
eventWriter.flush();
|
||||
eventWriter.close();
|
||||
|
||||
this.queue.put(xmlWriter.toString());
|
||||
|
||||
eventWriter = null;
|
||||
xmlWriter = null;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.queue.put(END_ELEM); // TO INDICATE THE END OF THE QUEUE
|
||||
} catch (final Throwable e) {
|
||||
log.error("Error processing BASE records", e);
|
||||
report.put(e.getClass().getName(), e.getMessage());
|
||||
throw new RuntimeException("Error processing BASE records", e);
|
||||
} finally {
|
||||
log.info("Total records (written in queue): " + count);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,159 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.Spliterator;
|
||||
import java.util.Spliterators;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.DocumentHelper;
|
||||
import org.dom4j.Node;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.file.AbstractSplittedRecordPlugin;
|
||||
import eu.dnetlib.dhp.common.DbClient;
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
|
||||
public class BaseCollectorPlugin implements CollectorPlugin {
|
||||
|
||||
private final FileSystem fs;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(AbstractSplittedRecordPlugin.class);
|
||||
|
||||
// MAPPING AND FILTERING ARE DEFINED HERE:
|
||||
// https://docs.google.com/document/d/1Aj-ZAV11b44MCrAAUCPiS2TUlXb6PnJEu1utCMAcCOU/edit
|
||||
|
||||
public BaseCollectorPlugin(final FileSystem fs) {
|
||||
this.fs = fs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report) throws CollectorException {
|
||||
// the path of the dump file on HDFS
|
||||
// http://oai.base-search.net/initial_load/base_oaipmh_dump-current.tar
|
||||
// it could be downloaded from iis-cdh5-test-gw.ocean.icm.edu.pl and then copied on HDFS
|
||||
final Path filePath = Optional
|
||||
.ofNullable(api.getBaseUrl())
|
||||
.map(Path::new)
|
||||
.orElseThrow(() -> new CollectorException("missing baseUrl"));
|
||||
|
||||
// get the parameters for the connection to the OpenAIRE database.
|
||||
// the database is used to obtain the list of the datasources that the plugin will collect
|
||||
final String dbUrl = api.getParams().get("dbUrl");
|
||||
final String dbUser = api.getParams().get("dbUser");
|
||||
final String dbPassword = api.getParams().get("dbPassword");
|
||||
|
||||
// the types(comma separated, empty value for all) that the plugin will collect,
|
||||
// the types should be expressed in the format of the normalized types of BASE (for example 1,121,...)
|
||||
final String acceptedNormTypesString = api.getParams().get("acceptedNormTypes");
|
||||
|
||||
log.info("baseUrl: {}", filePath);
|
||||
log.info("dbUrl: {}", dbUrl);
|
||||
log.info("dbUser: {}", dbUser);
|
||||
log.info("dbPassword: {}", "***");
|
||||
log.info("acceptedNormTypes: {}", acceptedNormTypesString);
|
||||
|
||||
try {
|
||||
if (!this.fs.exists(filePath)) {
|
||||
throw new CollectorException("path does not exist: " + filePath);
|
||||
}
|
||||
} catch (final Throwable e) {
|
||||
throw new CollectorException(e);
|
||||
}
|
||||
|
||||
final Set<String> acceptedOpendoarIds = findAcceptedOpendoarIds(dbUrl, dbUser, dbPassword);
|
||||
|
||||
final Set<String> acceptedNormTypes = new HashSet<>();
|
||||
if (StringUtils.isNotBlank(acceptedNormTypesString)) {
|
||||
for (final String s : StringUtils.split(acceptedNormTypesString, ",")) {
|
||||
if (StringUtils.isNotBlank(s)) {
|
||||
acceptedNormTypes.add(s.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final Iterator<String> iterator = new BaseCollectorIterator(this.fs, filePath, report);
|
||||
final Spliterator<String> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
|
||||
return StreamSupport
|
||||
.stream(spliterator, false)
|
||||
.filter(doc -> filterXml(doc, acceptedOpendoarIds, acceptedNormTypes));
|
||||
}
|
||||
|
||||
private Set<String> findAcceptedOpendoarIds(final String dbUrl, final String dbUser, final String dbPassword)
|
||||
throws CollectorException {
|
||||
final Set<String> accepted = new HashSet<>();
|
||||
|
||||
try (final DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) {
|
||||
|
||||
final String sql = IOUtils
|
||||
.toString(
|
||||
getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql"));
|
||||
|
||||
dbClient.processResults(sql, row -> {
|
||||
try {
|
||||
final String dsId = row.getString("id");
|
||||
log.info("Accepted Datasource: " + dsId);
|
||||
accepted.add(dsId);
|
||||
} catch (final SQLException e) {
|
||||
log.error("Error in SQL", e);
|
||||
throw new RuntimeException("Error in SQL", e);
|
||||
}
|
||||
});
|
||||
|
||||
} catch (final IOException e) {
|
||||
log.error("Error accessong SQL", e);
|
||||
throw new CollectorException("Error accessong SQL", e);
|
||||
}
|
||||
|
||||
log.info("Accepted Datasources (TOTAL): " + accepted.size());
|
||||
|
||||
return accepted;
|
||||
}
|
||||
|
||||
protected static boolean filterXml(final String xml,
|
||||
final Set<String> acceptedOpendoarIds,
|
||||
final Set<String> acceptedNormTypes) {
|
||||
try {
|
||||
|
||||
final Document doc = DocumentHelper.parseText(xml);
|
||||
|
||||
final String id = doc.valueOf("//*[local-name()='collection']/@opendoar_id").trim();
|
||||
|
||||
if (StringUtils.isBlank(id) || !acceptedOpendoarIds.contains("opendoar____::" + id)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (acceptedNormTypes.isEmpty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (final Object s : doc.selectNodes("//*[local-name()='typenorm']")) {
|
||||
if (acceptedNormTypes.contains(((Node) s).getText().trim())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (final DocumentException e) {
|
||||
log.error("Error parsing document", e);
|
||||
throw new RuntimeException("Error parsing document", e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -52,8 +52,6 @@ public class RestIterator implements Iterator<String> {
|
|||
|
||||
private final String BASIC = "basic";
|
||||
|
||||
private final JsonUtils jsonUtils;
|
||||
|
||||
private final String baseUrl;
|
||||
private final String resumptionType;
|
||||
private final String resumptionParam;
|
||||
|
@ -106,7 +104,6 @@ public class RestIterator implements Iterator<String> {
|
|||
final String resultOutputFormat) {
|
||||
|
||||
this.clientParams = clientParams;
|
||||
this.jsonUtils = new JsonUtils();
|
||||
this.baseUrl = baseUrl;
|
||||
this.resumptionType = resumptionType;
|
||||
this.resumptionParam = resumptionParam;
|
||||
|
@ -126,6 +123,7 @@ public class RestIterator implements Iterator<String> {
|
|||
} catch (Exception e) {
|
||||
throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
|
||||
}
|
||||
|
||||
initQueue();
|
||||
}
|
||||
|
||||
|
@ -190,7 +188,7 @@ public class RestIterator implements Iterator<String> {
|
|||
String resultJson;
|
||||
String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
||||
String nextQuery = "";
|
||||
String emptyXml = resultXml + "<" + JsonUtils.wrapName + "></" + JsonUtils.wrapName + ">";
|
||||
String emptyXml = resultXml + "<" + JsonUtils.XML_WRAP_TAG + "></" + JsonUtils.XML_WRAP_TAG + ">";
|
||||
Node resultNode = null;
|
||||
NodeList nodeList = null;
|
||||
String qUrlArgument = "";
|
||||
|
@ -231,7 +229,7 @@ public class RestIterator implements Iterator<String> {
|
|||
resultStream = theHttpInputStream;
|
||||
if ("json".equals(resultOutputFormat)) {
|
||||
resultJson = IOUtils.toString(resultStream, StandardCharsets.UTF_8);
|
||||
resultXml = jsonUtils.convertToXML(resultJson);
|
||||
resultXml = JsonUtils.convertToXML(resultJson);
|
||||
resultStream = IOUtils.toInputStream(resultXml, UTF_8);
|
||||
}
|
||||
|
||||
|
|
|
@ -3,82 +3,142 @@ package eu.dnetlib.dhp.collection.plugin.utils;
|
|||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
|
||||
public class JsonUtils {
|
||||
public static final String XML_WRAP_TAG = "recordWrap";
|
||||
private static final String XML_HEADER = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
||||
private static final String INVALID_XMLTAG_CHARS = "!\"#$%&'()*+,/;<=>?@[\\]^`{|}~,";
|
||||
|
||||
private static final Log log = LogFactory.getLog(JsonUtils.class);
|
||||
|
||||
public static final String wrapName = "recordWrap";
|
||||
|
||||
/**
|
||||
* convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to ''
|
||||
* cleanup in JSON-KeyName
|
||||
* check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names
|
||||
* and work-around for the JSON to XML converting of org.json.XML-package.
|
||||
*
|
||||
* known bugs: doesn't prevent "key name":" ["sexy name",": penari","erotic dance"],
|
||||
*
|
||||
* @param jsonInput
|
||||
* @return convertedJsonKeynameOutput
|
||||
* @param input
|
||||
* @return converted json object
|
||||
*/
|
||||
public String syntaxConvertJsonKeyNames(String jsonInput) {
|
||||
|
||||
log.trace("before convertJsonKeyNames: " + jsonInput);
|
||||
// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
|
||||
// replace ' 's in JSON Namens with '_'
|
||||
while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) {
|
||||
jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
|
||||
public static JSONObject cleanJsonObject(final JSONObject input) {
|
||||
if (null == input) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// replace forward-slash (sign '/' ) in JSON Names with '_'
|
||||
while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) {
|
||||
jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":");
|
||||
JSONObject result = new JSONObject();
|
||||
|
||||
for (String key : input.keySet()) {
|
||||
Object value = input.opt(key);
|
||||
if (value != null) {
|
||||
result.put(cleanKey(key), cleanValue(value));
|
||||
}
|
||||
}
|
||||
|
||||
// replace '(' in JSON Names with ''
|
||||
while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) {
|
||||
jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":");
|
||||
}
|
||||
|
||||
// replace ')' in JSON Names with ''
|
||||
while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) {
|
||||
jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":");
|
||||
}
|
||||
|
||||
// add prefix of startNumbers in JSON Keynames with 'n_'
|
||||
while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) {
|
||||
jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":");
|
||||
}
|
||||
// add prefix of only numbers in JSON Keynames with 'm_'
|
||||
while (jsonInput.matches(".*\"([0-9]+)\":.*")) {
|
||||
jsonInput = jsonInput.replaceAll("\"([0-9]+)\":", "\"m_$1\":");
|
||||
}
|
||||
|
||||
// replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with ''
|
||||
while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) {
|
||||
jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":");
|
||||
}
|
||||
|
||||
// replace ',' in JSON Keynames with '.' to prevent , in xml tagnames.
|
||||
// while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) {
|
||||
// jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":");
|
||||
// }
|
||||
|
||||
// replace '=' in JSON Keynames with '-'
|
||||
while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) {
|
||||
jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":");
|
||||
}
|
||||
|
||||
log.trace("after syntaxConvertJsonKeyNames: " + jsonInput);
|
||||
return jsonInput;
|
||||
return result;
|
||||
}
|
||||
|
||||
public String convertToXML(final String jsonRecord) {
|
||||
String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
||||
org.json.JSONObject jsonObject = new org.json.JSONObject(syntaxConvertJsonKeyNames(jsonRecord));
|
||||
resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element
|
||||
log.trace("before inputStream: " + resultXml);
|
||||
resultXml = XmlCleaner.cleanAllEntities(resultXml);
|
||||
log.trace("after cleaning: " + resultXml);
|
||||
return resultXml;
|
||||
private static Object cleanValue(Object object) {
|
||||
if (object instanceof JSONObject) {
|
||||
return cleanJsonObject((JSONObject) object);
|
||||
} else if (object instanceof JSONArray) {
|
||||
JSONArray array = (JSONArray) object;
|
||||
JSONArray res = new JSONArray();
|
||||
|
||||
for (int i = array.length() - 1; i >= 0; i--) {
|
||||
res.put(i, cleanValue(array.opt(i)));
|
||||
}
|
||||
return res;
|
||||
} else if (object instanceof String) {
|
||||
String value = (String) object;
|
||||
|
||||
// XML 1.0 Allowed characters
|
||||
// Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
|
||||
|
||||
return value
|
||||
.codePoints()
|
||||
.filter(
|
||||
cp -> cp == 0x9 || cp == 0xA || cp == 0xD || (cp >= 0x20 && cp <= 0xD7FF)
|
||||
|| (cp >= 0xE000 && cp <= 0xFFFD)
|
||||
|| (cp >= 0x10000 && cp <= 0x10FFFF))
|
||||
.collect(
|
||||
StringBuilder::new,
|
||||
StringBuilder::appendCodePoint,
|
||||
StringBuilder::append)
|
||||
.toString();
|
||||
}
|
||||
|
||||
return object;
|
||||
}
|
||||
|
||||
private static String cleanKey(String key) {
|
||||
if (key == null || key.isEmpty()) {
|
||||
return key;
|
||||
}
|
||||
|
||||
// xml tag cannot begin with "-", ".", or a numeric digit.
|
||||
switch (key.charAt(0)) {
|
||||
case '-':
|
||||
case '.':
|
||||
key = "_" + key.substring(1);
|
||||
break;
|
||||
}
|
||||
|
||||
if (Character.isDigit(key.charAt(0))) {
|
||||
if (key.matches("^[0-9]+$")) {
|
||||
// add prefix of only numbers in JSON Keynames with 'm_'
|
||||
key = "m_" + key;
|
||||
} else {
|
||||
// add prefix of startNumbers in JSON Keynames with 'n_'
|
||||
key = "n_" + key;
|
||||
}
|
||||
}
|
||||
|
||||
StringBuilder res = new StringBuilder(key.length());
|
||||
for (int i = 0; i < key.length(); i++) {
|
||||
char c = key.charAt(i);
|
||||
|
||||
// sequence of whitespaces are rendered as a single '_'
|
||||
if (Character.isWhitespace(c)) {
|
||||
while (i + 1 < key.length() && Character.isWhitespace(key.charAt(i + 1))) {
|
||||
i++;
|
||||
}
|
||||
res.append('_');
|
||||
}
|
||||
// remove invalid chars for xml tags with the expception of '=' and '/'
|
||||
else if (INVALID_XMLTAG_CHARS.indexOf(c) >= 0) {
|
||||
switch (c) {
|
||||
case '=':
|
||||
res.append('-');
|
||||
break;
|
||||
case '/':
|
||||
res.append('_');
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
// nothing
|
||||
}
|
||||
// all other chars are kept
|
||||
else {
|
||||
res.append(c);
|
||||
}
|
||||
}
|
||||
|
||||
return res.toString();
|
||||
}
|
||||
|
||||
static public String convertToXML(final String jsonRecord) {
|
||||
if (log.isTraceEnabled()) {
|
||||
log.trace("input json: " + jsonRecord);
|
||||
}
|
||||
|
||||
JSONObject jsonObject = cleanJsonObject(new org.json.JSONObject(jsonRecord));
|
||||
String res = XML_HEADER + org.json.XML.toString(jsonObject, XML_WRAP_TAG); // wrap xml in single root element
|
||||
|
||||
if (log.isTraceEnabled()) {
|
||||
log.trace("outout xml: " + res);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
[
|
||||
{
|
||||
"paramName": "issm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "when true will stop SparkSession after job execution",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "dip",
|
||||
"paramLongName": "dblpInputPath",
|
||||
"paramDescription": "the path to get the input data from DBLP",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "o",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path of the new ActionSet",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -0,0 +1,35 @@
|
|||
# --- You can override the following properties (if needed) coming from your ~/.dhp/application.properties ---
|
||||
# dhp.hadoop.frontend.temp.dir=/home/ilias.kanellos
|
||||
# dhp.hadoop.frontend.user.name=ilias.kanellos
|
||||
# dhp.hadoop.frontend.host.name=iis-cdh5-test-gw.ocean.icm.edu.pl
|
||||
# dhp.hadoop.frontend.port.ssh=22
|
||||
# oozieServiceLoc=http://iis-cdh5-test-m3:11000/oozie
|
||||
# jobTracker=yarnRM
|
||||
# nameNode=hdfs://nameservice1
|
||||
# oozie.execution.log.file.location = target/extract-and-run-on-remote-host.log
|
||||
# maven.executable=mvn
|
||||
|
||||
# Some memory and driver settings for more demanding tasks
|
||||
sparkDriverMemory=10G
|
||||
sparkExecutorMemory=10G
|
||||
sparkExecutorCores=4
|
||||
sparkShufflePartitions=7680
|
||||
|
||||
# The above is given differently in an example I found online
|
||||
oozie.action.sharelib.for.spark=spark2
|
||||
oozieActionShareLibForSpark2=spark2
|
||||
spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
|
||||
spark2EventLogDir=/user/spark/spark2ApplicationHistory
|
||||
sparkSqlWarehouseDir=/user/hive/warehouse
|
||||
hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
|
||||
# This MAY avoid the no library used error
|
||||
oozie.use.system.libpath=true
|
||||
# Some stuff copied from openaire's jobs
|
||||
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
|
||||
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
|
||||
|
||||
# The following is needed as a property of a workflow
|
||||
oozie.wf.application.path=${oozieTopWfApplicationPath}
|
||||
|
||||
dblpInputPath=/data/dblp/dblp.xml.gz
|
||||
outputPath=/tmp/dblp-actionsets
|
|
@ -0,0 +1,30 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveJdbcUrl</name>
|
||||
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveDbName</name>
|
||||
<value>openaire</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,107 @@
|
|||
<workflow-app name="BipAffiliations" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
|
||||
<property>
|
||||
<name>dblpInputPath</name>
|
||||
<description>the path where to find the input data from DBLP</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the path where to store the actionset</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozieActionShareLibForSpark2</name>
|
||||
<description>oozie action sharelib for spark 2.*</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
<description>spark 2.* extra listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
<description>spark 2.* sql query execution listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<description>spark 2.* yarn history server address</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<description>spark 2.* event log dir location</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||
<value>${oozieLauncherQueueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
</global>
|
||||
<start to="deleteoutputpath"/>
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
<action name="deleteoutputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
<delete path="${workingDir}"/>
|
||||
<mkdir path="${workingDir}"/>
|
||||
</fs>
|
||||
<ok to="atomicactions"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="atomicactions">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Produces the atomic action sets with the DBLP data</name>
|
||||
<class>eu.dnetlib.dhp.actionmanager.dblp.PrepareDblpActionSets</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--dblpInputPath</arg><arg>${dblpInputPath}</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,26 @@
|
|||
[
|
||||
{
|
||||
"paramName": "m",
|
||||
"paramLongName": "master",
|
||||
"paramDescription": "the master name",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "t",
|
||||
"paramLongName": "targetPath",
|
||||
"paramDescription": "the target PATH of the DF tables",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "g",
|
||||
"paramLongName": "graphPath",
|
||||
"paramDescription": "the PATH of the current graph path",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "u",
|
||||
"paramLongName": "updatePath",
|
||||
"paramDescription": "the PATH of the current graph update path",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -0,0 +1,37 @@
|
|||
[ {
|
||||
"paramName": "n",
|
||||
"paramLongName": "namenode",
|
||||
"paramDescription": "the Name Node URI",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "m",
|
||||
"paramLongName": "master",
|
||||
"paramDescription": "the master name",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "t",
|
||||
"paramLongName": "targetPath",
|
||||
"paramDescription": "the target PATH where download the files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "a",
|
||||
"paramLongName": "apiURL",
|
||||
"paramDescription": "the URL to download the tar file",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "g",
|
||||
"paramLongName": "graphPath",
|
||||
"paramDescription": "the path of the input graph",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "at",
|
||||
"paramLongName": "accessToken",
|
||||
"paramDescription": "the accessToken to contact API",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -16,6 +16,12 @@
|
|||
"paramLongName": "sourcePath",
|
||||
"paramDescription": "the PATH of the ORCID sequence file",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "fu",
|
||||
"paramLongName": "fromUpdate",
|
||||
"paramDescription": "whether we have to generate table from dump or from update",
|
||||
"paramRequired": false
|
||||
}
|
||||
|
||||
]
|
|
@ -0,0 +1,23 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,114 @@
|
|||
<workflow-app name="download_Update_ORCID" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>graphPath</name>
|
||||
<description>the path to store the original ORCID dump</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>targetPath</name>
|
||||
<description>the path to store the original ORCID dump</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>apiURL</name>
|
||||
<value>http://74804fb637bd8e2fba5b-e0a029c2f87486cddec3b416996a6057.r3.cf1.rackcdn.com/last_modified.csv.tar</value>
|
||||
<description>The URL of the update CSV list </description>
|
||||
</property>
|
||||
<property>
|
||||
<name>accessToken</name>
|
||||
<description>The access token</description>
|
||||
</property>
|
||||
|
||||
|
||||
</parameters>
|
||||
|
||||
<start to="startUpdate"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="startUpdate">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Check Latest Orcid and Download updates</name>
|
||||
<class>eu.dnetlib.dhp.collection.orcid.OrcidGetUpdatesFile</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=2g
|
||||
--conf spark.sql.shuffle.partitions=3000
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
<arg>--namenode</arg><arg>${nameNode}</arg>
|
||||
<arg>--graphPath</arg><arg>${graphPath}</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||
<arg>--apiURL</arg><arg>${apiURL}</arg>
|
||||
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||
</spark>
|
||||
<ok to="generateTables"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<action name="generateTables">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Generate ORCID Tables</name>
|
||||
<class>eu.dnetlib.dhp.collection.orcid.SparkGenerateORCIDTable</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=2g
|
||||
--conf spark.sql.shuffle.partitions=3000
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${targetPath}</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}/updateTable</arg>
|
||||
<arg>--fromUpdate</arg><arg>true</arg>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
</spark>
|
||||
<ok to="updateTable"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="updateTable">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Update ORCID Tables</name>
|
||||
<class>eu.dnetlib.dhp.collection.orcid.SparkApplyUpdate</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=2g
|
||||
--conf spark.sql.shuffle.partitions=3000
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphPath}</arg>
|
||||
<arg>--updatePath</arg><arg>${targetPath}/updateTable</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}/newTable</arg>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,114 @@
|
|||
BEGIN;
|
||||
|
||||
INSERT INTO dsm_services(
|
||||
_dnet_resource_identifier_,
|
||||
id,
|
||||
officialname,
|
||||
englishname,
|
||||
namespaceprefix,
|
||||
websiteurl,
|
||||
logourl,
|
||||
platform,
|
||||
contactemail,
|
||||
collectedfrom,
|
||||
provenanceaction,
|
||||
_typology_to_remove_,
|
||||
eosc_type,
|
||||
eosc_datasource_type,
|
||||
research_entity_types,
|
||||
thematic
|
||||
) VALUES (
|
||||
'openaire____::base_search',
|
||||
'openaire____::base_search',
|
||||
'Bielefeld Academic Search Engine (BASE)',
|
||||
'Bielefeld Academic Search Engine (BASE)',
|
||||
'base_search_',
|
||||
'https://www.base-search.net',
|
||||
'https://www.base-search.net/about/download/logo_224x57_white.gif',
|
||||
'BASE',
|
||||
'openaire-helpdesk@uni-bielefeld.de',
|
||||
'infrastruct_::openaire',
|
||||
'user:insert',
|
||||
'aggregator::pubsrepository::unknown',
|
||||
'Data Source',
|
||||
'Aggregator',
|
||||
ARRAY['Research Products'],
|
||||
false
|
||||
);
|
||||
|
||||
INSERT INTO dsm_service_organization(
|
||||
_dnet_resource_identifier_,
|
||||
organization,
|
||||
service
|
||||
) VALUES (
|
||||
'fairsharing_::org::214@@openaire____::base_search',
|
||||
'fairsharing_::org::214',
|
||||
'openaire____::base_search'
|
||||
);
|
||||
|
||||
INSERT INTO dsm_api(
|
||||
_dnet_resource_identifier_,
|
||||
id,
|
||||
service,
|
||||
protocol,
|
||||
baseurl,
|
||||
metadata_identifier_path
|
||||
) VALUES (
|
||||
'api_________::openaire____::base_search::dump',
|
||||
'api_________::openaire____::base_search::dump',
|
||||
'openaire____::base_search',
|
||||
'baseDump',
|
||||
'/user/michele.artini/base-import/base_oaipmh_dump-current.tar',
|
||||
'//*[local-name()=''header'']/*[local-name()=''identifier'']'
|
||||
);
|
||||
|
||||
|
||||
INSERT INTO dsm_apiparams(
|
||||
_dnet_resource_identifier_,
|
||||
api,
|
||||
param,
|
||||
value
|
||||
) VALUES (
|
||||
'api_________::openaire____::base_search::dump@@dbUrl',
|
||||
'api_________::openaire____::base_search::dump',
|
||||
'dbUrl',
|
||||
'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus'
|
||||
);
|
||||
|
||||
INSERT INTO dsm_apiparams(
|
||||
_dnet_resource_identifier_,
|
||||
api,
|
||||
param,
|
||||
value
|
||||
) VALUES (
|
||||
'api_________::openaire____::base_search::dump@@dbUser',
|
||||
'api_________::openaire____::base_search::dump',
|
||||
'dbUser',
|
||||
'dnet'
|
||||
);
|
||||
|
||||
INSERT INTO dsm_apiparams(
|
||||
_dnet_resource_identifier_,
|
||||
api,
|
||||
param,
|
||||
value
|
||||
) VALUES (
|
||||
'api_________::openaire____::base_search::dump@@dbPassword',
|
||||
'api_________::openaire____::base_search::dump',
|
||||
'dbPassword',
|
||||
'***'
|
||||
);
|
||||
|
||||
INSERT INTO dsm_apiparams(
|
||||
_dnet_resource_identifier_,
|
||||
api,
|
||||
param,
|
||||
value
|
||||
) VALUES (
|
||||
'api_________::openaire____::base_search::dump@@acceptedNormTypes',
|
||||
'api_________::openaire____::base_search::dump',
|
||||
'acceptedNormTypes',
|
||||
'1,11,111,121,14,15,18,181,182,183,1A,6,7'
|
||||
);
|
||||
|
||||
COMMIT;
|
|
@ -0,0 +1,9 @@
|
|||
select s.id as id
|
||||
from dsm_services s
|
||||
where collectedfrom = 'openaire____::opendoar'
|
||||
and jurisdiction = 'Institutional'
|
||||
and s.id in (
|
||||
select service from dsm_api where coalesce(compatibility_override, compatibility) = 'driver' or coalesce(compatibility_override, compatibility) = 'UNKNOWN'
|
||||
) and s.id not in (
|
||||
select service from dsm_api where coalesce(compatibility_override, compatibility) like '%openaire%'
|
||||
);
|
|
@ -0,0 +1,11 @@
|
|||
select
|
||||
s.id as id,
|
||||
s.jurisdiction as jurisdiction,
|
||||
array_remove(array_agg(a.id || ' (compliance: ' || coalesce(a.compatibility_override, a.compatibility, 'UNKNOWN') || ')@@@' || coalesce(a.last_collection_total, 0)), NULL) as aggregations
|
||||
from
|
||||
dsm_services s
|
||||
join dsm_api a on (s.id = a.service)
|
||||
where
|
||||
collectedfrom = 'openaire____::opendoar'
|
||||
group by
|
||||
s.id;
|
|
@ -0,0 +1,180 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="c67911d6-9988-4a3b-b965-7d39bdd4a31d_Vm9jYWJ1bGFyeURTUmVzb3VyY2VzL1ZvY2FidWxhcnlEU1Jlc291cmNlVHlwZQ==" />
|
||||
<RESOURCE_TYPE value="VocabularyDSResourceType" />
|
||||
<RESOURCE_KIND value="VocabularyDSResources" />
|
||||
<RESOURCE_URI value="" />
|
||||
<DATE_OF_CREATION value="2024-02-13T11:15:48+00:00" />
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<CONFIGURATION>
|
||||
<VOCABULARY_NAME code="base:normalized_types">base:normalized_types</VOCABULARY_NAME>
|
||||
<VOCABULARY_DESCRIPTION>base:normalized_types</VOCABULARY_DESCRIPTION>
|
||||
<TERMS>
|
||||
<TERM native_name="Text" code="Text" english_name="Text" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="1" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Book" code="Book" english_name="Book" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="11" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Book part" code="Book part" english_name="Book part" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="111" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Journal/Newspaper" code="Journal/Newspaper" english_name="Journal/Newspaper" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="12" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Article contribution" code="Article contribution" english_name="Article contribution" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="121" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Other non-article" code="Other non-article" english_name="Other non-article" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="122" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Conference object" code="Conference object" english_name="Conference object" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="13" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Report" code="Report" english_name="Report" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="14" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Review" code="Review" english_name="Review" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="15" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Course material" code="Course material" english_name="Course material" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="16" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Lecture" code="Lecture" english_name="Lecture" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="17" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Thesis" code="Thesis" english_name="Thesis" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="18" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Bachelor's thesis" code="Bachelor's thesis" english_name="Bachelor's thesis" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="181" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Master's thesis" code="Master's thesis" english_name="Master's thesis" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="182" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Doctoral and postdoctoral thesis" code="Doctoral and postdoctoral thesis" english_name="Doctoral and postdoctoral thesis" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="183" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Manuscript" code="Manuscript" english_name="Manuscript" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="19" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Patent" code="Patent" english_name="Patent" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="1A" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Musical notation" code="Musical notation" english_name="Musical notation" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="2" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Map" code="Map" english_name="Map" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="3" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Audio" code="Audio" english_name="Audio" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="4" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Image/Video" code="Image/Video" english_name="Image/Video" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="5" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Still image" code="Still image" english_name="Still image" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="51" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Moving image/Video" code="Moving image/Video" english_name="Moving image/Video" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="52" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Software" code="Software" english_name="Software" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="6" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Dataset" code="Dataset" english_name="Dataset" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="7" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Unknown" code="Unknown" english_name="Unknown" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="F" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
|
||||
</TERMS>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_UPDATE value="2013-11-18T10:46:36Z" />
|
||||
</STATUS>
|
||||
<SECURITY_PARAMETERS>String</SECURITY_PARAMETERS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
||||
|
||||
|
|
@ -0,0 +1,430 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="" />
|
||||
<RESOURCE_TYPE value="TransformationRuleDSResourceType" />
|
||||
<RESOURCE_KIND value="TransformationRuleDSResources" />
|
||||
<RESOURCE_URI value="" />
|
||||
<DATE_OF_CREATION value="2024-03-05T11:23:00+00:00" />
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<CONFIGURATION>
|
||||
<SOURCE_METADATA_FORMAT interpretation="cleaned" layout="store" name="dc" />
|
||||
<SINK_METADATA_FORMAT name="oaf_hbase" />
|
||||
<IMPORTED />
|
||||
<SCRIPT>
|
||||
<TITLE>xslt_base2oaf_hadoop</TITLE>
|
||||
<CODE>
|
||||
<xsl:stylesheet xmlns:oaire="http://namespace.openaire.eu/schema/oaire/" xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
|
||||
xmlns:base_dc="http://oai.base-search.net/base_dc/"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-4" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:vocabulary="http://eu/dnetlib/transform/clean" xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
exclude-result-prefixes="xsl vocabulary dateCleaner base_dc" version="2.0">
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:param name="varFP7" select="'corda_______::'" />
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'" />
|
||||
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" />
|
||||
<xsl:param name="index" select="0" />
|
||||
<xsl:param name="transDate" select="current-dateTime()" />
|
||||
|
||||
<xsl:template name="terminate">
|
||||
<xsl:message terminate="yes">
|
||||
record is not compliant, transformation is interrupted.
|
||||
</xsl:message>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
|
||||
|
||||
<!-- TO EVALUATE
|
||||
base_dc:authod_id
|
||||
base_dc:authod_id/base_dc:creator_id
|
||||
base_dc:authod_id/base_dc:creator_name
|
||||
|
||||
example:
|
||||
|
||||
<dc:creator>ALBU, Svetlana</dc:creator>
|
||||
|
||||
<base_dc:authod_id>
|
||||
<base_dc:creator_name>ALBU, Svetlana</base_dc:creator_name>
|
||||
<base_dc:creator_id>https://orcid.org/0000-0002-8648-950X</base_dc:creator_id>
|
||||
</base_dc:authod_id>
|
||||
-->
|
||||
|
||||
<!-- NOT USED
|
||||
base_dc:global_id (I used oai:identifier)
|
||||
base_dc:collection/text()
|
||||
|
||||
base_dc:continent
|
||||
base_dc:country
|
||||
base_dc:year (I used dc:date)
|
||||
dc:coverage
|
||||
dc:language (I used base_dc:lang)
|
||||
base_dc:link (I used dc:identifier)
|
||||
-->
|
||||
|
||||
<metadata>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:title" />
|
||||
<xsl:with-param name="targetElement" select="'dc:title'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:creator/replace(., '^(.*)\|.*$', '$1')" />
|
||||
<xsl:with-param name="targetElement" select="'dc:creator'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:contributor" />
|
||||
<xsl:with-param name="targetElement" select="'dc:contributor'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:description" />
|
||||
<xsl:with-param name="targetElement" select="'dc:description'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:subject" />
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<!-- TODO: I'm not sure if this is the correct encoding -->
|
||||
<xsl:for-each select="//base_dc:classcode|//base_dc:autoclasscode">
|
||||
<dc:subject><xsl:value-of select="concat(@type, ':', .)" /></dc:subject>
|
||||
</xsl:for-each>
|
||||
<!-- END TODO -->
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:publisher" />
|
||||
<xsl:with-param name="targetElement" select="'dc:publisher'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:format" />
|
||||
<xsl:with-param name="targetElement" select="'dc:format'" />
|
||||
</xsl:call-template>
|
||||
|
||||
|
||||
<xsl:for-each select="//base_dc:typenorm">
|
||||
<dc:type>
|
||||
<xsl:value-of select="vocabulary:clean(., 'base:normalized_types')" />
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:type" />
|
||||
<xsl:with-param name="targetElement" select="'dc:type'" />
|
||||
</xsl:call-template>
|
||||
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:source" />
|
||||
<xsl:with-param name="targetElement" select="'dc:source'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<dc:language>
|
||||
<xsl:value-of select="vocabulary:clean( //base_dc:lang, 'dnet:languages')" />
|
||||
</dc:language>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:rights" />
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:relation" />
|
||||
<xsl:with-param name="targetElement" select="'dc:relation'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:if test="not(//dc:identifier[starts-with(., 'http')])">
|
||||
<xsl:call-template name="terminate" />
|
||||
</xsl:if>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:identifier[starts-with(., 'http')]" />
|
||||
<xsl:with-param name="targetElement" select="'dc:identifier'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:for-each select="//dc:relation">
|
||||
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:choose>
|
||||
<!-- I used an inline mapping because the field typenorm could be repeated and I have to specify a list of priority -->
|
||||
|
||||
<!-- Book part -->
|
||||
<xsl:when test="//base_dc:typenorm = '111'">
|
||||
<dr:CobjCategory type="publication">0013</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Book -->
|
||||
<xsl:when test="//base_dc:typenorm = '11'">
|
||||
<dr:CobjCategory type="publication">0002</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Article contribution -->
|
||||
<xsl:when test="//base_dc:typenorm = '121'">
|
||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
|
||||
<!-- Journal/Newspaper -->
|
||||
<xsl:when test="//base_dc:typenorm = '12'">
|
||||
<dr:CobjCategory type="publication">0043</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Report -->
|
||||
<xsl:when test="//base_dc:typenorm = '14'">
|
||||
<dr:CobjCategory type="publication">0017</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Review -->
|
||||
<xsl:when test="//base_dc:typenorm = '15'">
|
||||
<dr:CobjCategory type="publication">0015</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Lecture -->
|
||||
<xsl:when test="//base_dc:typenorm = '17'">
|
||||
<dr:CobjCategory type="publication">0010</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Bachelor's thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '181'">
|
||||
<dr:CobjCategory type="publication">0008</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Master's thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '182'">
|
||||
<dr:CobjCategory type="publication">0007</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Doctoral and postdoctoral thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '183'">
|
||||
<dr:CobjCategory type="publication">0006</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '18'">
|
||||
<dr:CobjCategory type="publication">0044</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Patent -->
|
||||
<xsl:when test="//base_dc:typenorm = '1A'">
|
||||
<dr:CobjCategory type="publication">0019</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Text -->
|
||||
<xsl:when test="//base_dc:typenorm = '1'">
|
||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Software -->
|
||||
<xsl:when test="//base_dc:typenorm = '6'">
|
||||
<dr:CobjCategory type="software">0029</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Dataset -->
|
||||
<xsl:when test="//base_dc:typenorm = '7'">
|
||||
<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Still image -->
|
||||
<xsl:when test="//base_dc:typenorm = '51'">
|
||||
<dr:CobjCategory type="other">0025</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Moving image/Video -->
|
||||
<xsl:when test="//base_dc:typenorm = '52'">
|
||||
<dr:CobjCategory type="other">0024</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Image/Video -->
|
||||
<xsl:when test="//base_dc:typenorm = '5'">
|
||||
<dr:CobjCategory type="other">0033</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Audio -->
|
||||
<xsl:when test="//base_dc:typenorm = '4'">
|
||||
<dr:CobjCategory type="other">0030</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Musical notation -->
|
||||
<xsl:when test="//base_dc:typenorm = '2'">
|
||||
<dr:CobjCategory type="other">0020</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Map -->
|
||||
<xsl:when test="//base_dc:typenorm = '3'">
|
||||
<dr:CobjCategory type="other">0020</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Other non-article -->
|
||||
<xsl:when test="//base_dc:typenorm = '122'">
|
||||
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Course material -->
|
||||
<xsl:when test="//base_dc:typenorm = '16'">
|
||||
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Manuscript -->
|
||||
<xsl:when test="//base_dc:typenorm = '19'">
|
||||
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Conference object -->
|
||||
<xsl:when test="//base_dc:typenorm = '13'">
|
||||
<dr:CobjCategory type="publication">0004</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Unknown -->
|
||||
<xsl:when test="//base_dc:typenorm = 'F'">
|
||||
<dr:CobjCategory type="other">0000</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<dr:CobjCategory type="other">0000</dr:CobjCategory>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
|
||||
|
||||
<oaf:accessrights>
|
||||
<xsl:choose>
|
||||
<xsl:when test="//base_dc:oa[.='1']">OPEN</xsl:when>
|
||||
<xsl:when test="//base_dc:rightsnorm">
|
||||
<xsl:value-of select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:access_modes')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="//dc:rights">
|
||||
<xsl:value-of select="vocabulary:clean( //dc:rights, 'dnet:access_modes')" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>UNKNOWN</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</oaf:accessrights>
|
||||
|
||||
<xsl:for-each select="//base_dc:doi">
|
||||
<oaf:identifier identifierType="doi">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and (not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/')))])">
|
||||
<oaf:identifier identifierType="url">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
|
||||
<oaf:identifier identifierType="handle">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
|
||||
<oaf:identifier identifierType='urn'>
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<oaf:identifier identifierType="oai-original">
|
||||
<xsl:value-of
|
||||
select="//*[local-name() = 'about']/*[local-name() = 'provenance']//*[local-name() = 'originDescription' and not(./*[local-name() = 'originDescription'])]/*[local-name() = 'identifier']" />
|
||||
</oaf:identifier>
|
||||
|
||||
<oaf:hostedBy>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="//base_dc:collname" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="concat('opendoar____::', //base_dc:collection/@opendoar_id)" />
|
||||
</xsl:attribute>
|
||||
</oaf:hostedBy>
|
||||
|
||||
<oaf:collectedFrom>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId" />
|
||||
</xsl:attribute>
|
||||
</oaf:collectedFrom>
|
||||
|
||||
<oaf:dateAccepted>
|
||||
<xsl:value-of select="dateCleaner:dateISO( //dc:date[1] )" />
|
||||
</oaf:dateAccepted>
|
||||
|
||||
<xsl:if test="//base_dc:oa[.='1']">
|
||||
<xsl:for-each select="//dc:relation[starts-with(., 'http')]">
|
||||
<oaf:fulltext>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</oaf:fulltext>
|
||||
</xsl:for-each>
|
||||
</xsl:if>
|
||||
|
||||
<xsl:for-each select="//base_dc:collection/@ror_id">
|
||||
<oaf:relation relType="resultOrganization"
|
||||
subRelType="affiliation"
|
||||
relClass="hasAuthorInstitution"
|
||||
targetType="organization">
|
||||
<xsl:choose>
|
||||
<xsl:when test="contains(.,'https://ror.org/')">
|
||||
<xsl:value-of select="concat('ror_________::', normalize-space(.))" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="concat('ror_________::https://ror.org/', normalize-space(.))" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</oaf:relation>
|
||||
</xsl:for-each>
|
||||
</metadata>
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="allElements">
|
||||
<xsl:param name="sourceElement" />
|
||||
<xsl:param name="targetElement" />
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="{$targetElement}">
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:if test="//oai:header/@status='deleted'">
|
||||
<xsl:call-template name="terminate" />
|
||||
</xsl:if>
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate" />
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
||||
</CODE>
|
||||
</SCRIPT>
|
||||
</CONFIGURATION>
|
||||
<STATUS />
|
||||
<SECURITY_PARAMETERS />
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,451 @@
|
|||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="2ad0cdd9-c96c-484c-8b0e-ed56d86891fe_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU=" />
|
||||
<RESOURCE_TYPE value="TransformationRuleDSResourceType" />
|
||||
<RESOURCE_KIND value="TransformationRuleDSResources" />
|
||||
<RESOURCE_URI value="" />
|
||||
<DATE_OF_CREATION value="2024-03-05T11:23:00+00:00" />
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<CONFIGURATION>
|
||||
<SOURCE_METADATA_FORMAT interpretation="cleaned" layout="store" name="dc" />
|
||||
<SINK_METADATA_FORMAT name="odf_hbase" />
|
||||
<IMPORTED />
|
||||
<SCRIPT>
|
||||
<TITLE>xslt_base2odf_hadoop</TITLE>
|
||||
<CODE>
|
||||
<xsl:stylesheet xmlns:oaire="http://namespace.openaire.eu/schema/oaire/" xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO" xmlns:base_dc="http://oai.base-search.net/base_dc/"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-4" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:vocabulary="http://eu/dnetlib/transform/clean" xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
exclude-result-prefixes="xsl vocabulary dateCleaner base_dc" version="2.0">
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:param name="varFP7" select="'corda_______::'" />
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'" />
|
||||
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" />
|
||||
<xsl:param name="index" select="0" />
|
||||
<xsl:param name="transDate" select="current-dateTime()" />
|
||||
|
||||
<xsl:template name="terminate">
|
||||
<xsl:message terminate="yes">
|
||||
record is not compliant, transformation is interrupted.
|
||||
</xsl:message>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
|
||||
|
||||
<!-- NOT USED
|
||||
base_dc:global_id (I used oai:identifier)
|
||||
base_dc:collection/text()
|
||||
base_dc:continent
|
||||
base_dc:country
|
||||
dc:coverage
|
||||
dc:source
|
||||
dc:relation
|
||||
dc:type (I used //base_dc:typenorm)
|
||||
dc:language (I used base_dc:lang)
|
||||
base_dc:link (I used dc:identifier)
|
||||
-->
|
||||
|
||||
<metadata>
|
||||
<datacite:resource>
|
||||
|
||||
<xsl:for-each select="//base_dc:doi">
|
||||
<datacite:identifier identifierType="DOI">
|
||||
<xsl:value-of select="." />
|
||||
</datacite:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<datacite:alternateIdentifiers>
|
||||
<xsl:for-each
|
||||
select="distinct-values(//dc:identifier[starts-with(., 'http') and (not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/')))])">
|
||||
<datacite:identifier alternateIdentifierType="url">
|
||||
<xsl:value-of select="." />
|
||||
</datacite:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
|
||||
<datacite:identifier alternateIdentifierType="handle">
|
||||
<xsl:value-of select="." />
|
||||
</datacite:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
|
||||
<datacite:identifier alternateIdentifierType='urn'>
|
||||
<xsl:value-of select="." />
|
||||
</datacite:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<datacite:identifier alternateIdentifierType="oai-original">
|
||||
<xsl:value-of
|
||||
select="//*[local-name() = 'about']/*[local-name() = 'provenance']//*[local-name() = 'originDescription' and not(./*[local-name() = 'originDescription'])]/*[local-name() = 'identifier']" />
|
||||
</datacite:identifier>
|
||||
</datacite:alternateIdentifiers>
|
||||
|
||||
<datacite:relatedIdentifiers />
|
||||
|
||||
|
||||
<xsl:for-each select="//base_dc:typenorm">
|
||||
<datacite:resourceType><xsl:value-of select="vocabulary:clean(., 'base:normalized_types')" /></datacite:resourceType>
|
||||
</xsl:for-each>
|
||||
|
||||
<datacite:titles>
|
||||
<xsl:for-each select="//dc:title">
|
||||
<datacite:title>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:title>
|
||||
</xsl:for-each>
|
||||
</datacite:titles>
|
||||
|
||||
<datacite:creators>
|
||||
<xsl:for-each select="//dc:creator">
|
||||
<xsl:variable name="author" select="normalize-space(.)" />
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>
|
||||
<xsl:value-of select="$author" />
|
||||
</datacite:creatorName>
|
||||
<xsl:for-each select="//base_dc:authod_id[normalize-space(./base_dc:creator_name) = $author]/base_dc:creator_id ">
|
||||
<xsl:if test="contains(.,'https://orcid.org/')">
|
||||
<nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID">
|
||||
<xsl:value-of select="substring-after(., 'https://orcid.org/')" />
|
||||
</nameIdentifier>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
</datacite:creator>
|
||||
</xsl:for-each>
|
||||
</datacite:creators>
|
||||
|
||||
<datacite:contributors>
|
||||
<xsl:for-each select="//dc:contributor">
|
||||
<datacite:contributor>
|
||||
<datacite:contributorName>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:contributorName>
|
||||
</datacite:contributor>
|
||||
</xsl:for-each>
|
||||
</datacite:contributors>
|
||||
|
||||
<datacite:descriptions>
|
||||
<xsl:for-each select="//dc:description">
|
||||
<datacite:description descriptionType="Abstract">
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:description>
|
||||
</xsl:for-each>
|
||||
</datacite:descriptions>
|
||||
|
||||
<datacite:subjects>
|
||||
<xsl:for-each select="//dc:subject">
|
||||
<datacite:subject>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:subject>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="//base_dc:classcode|//base_dc:autoclasscode">
|
||||
<datacite:subject subjectScheme="{@type}" classificationCode="{normalize-space(.)}">
|
||||
<!-- TODO the value should be obtained by the Code -->
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:subject>
|
||||
</xsl:for-each>
|
||||
</datacite:subjects>
|
||||
|
||||
<datacite:publisher>
|
||||
<xsl:value-of select="normalize-space(//dc:publisher)" />
|
||||
</datacite:publisher>
|
||||
|
||||
<datacite:publicationYear>
|
||||
<xsl:value-of select="normalize-space(//base_dc:year)" />
|
||||
</datacite:publicationYear>
|
||||
|
||||
<datacite:formats>
|
||||
<xsl:for-each select="//dc:format">
|
||||
<datacite:format>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:format>
|
||||
</xsl:for-each>
|
||||
</datacite:formats>
|
||||
|
||||
<datacite:language>
|
||||
<xsl:value-of select="vocabulary:clean( //base_dc:lang, 'dnet:languages')" />
|
||||
</datacite:language>
|
||||
|
||||
<oaf:accessrights>
|
||||
<xsl:if test="//base_dc:oa[.='1']">
|
||||
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
|
||||
</xsl:if>
|
||||
<xsl:for-each select="//dc:rights|//base_dc:rightsnorm">
|
||||
<datacite:rights><xsl:value-of select="vocabulary:clean(., 'dnet:access_modes')" /></datacite:rights>
|
||||
</xsl:for-each>
|
||||
</oaf:accessrights>
|
||||
|
||||
</datacite:resource>
|
||||
|
||||
<xsl:for-each select="//dc:relation">
|
||||
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:choose>
|
||||
<!-- I used an inline mapping because the field typenorm could be repeated and I have to specify a list of priority -->
|
||||
|
||||
<!-- Book part -->
|
||||
<xsl:when test="//base_dc:typenorm = '111'">
|
||||
<dr:CobjCategory type="publication">0013</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Book -->
|
||||
<xsl:when test="//base_dc:typenorm = '11'">
|
||||
<dr:CobjCategory type="publication">0002</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Article contribution -->
|
||||
<xsl:when test="//base_dc:typenorm = '121'">
|
||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
|
||||
<!-- Journal/Newspaper -->
|
||||
<xsl:when test="//base_dc:typenorm = '12'">
|
||||
<dr:CobjCategory type="publication">0043</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Report -->
|
||||
<xsl:when test="//base_dc:typenorm = '14'">
|
||||
<dr:CobjCategory type="publication">0017</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Review -->
|
||||
<xsl:when test="//base_dc:typenorm = '15'">
|
||||
<dr:CobjCategory type="publication">0015</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Lecture -->
|
||||
<xsl:when test="//base_dc:typenorm = '17'">
|
||||
<dr:CobjCategory type="publication">0010</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Bachelor's thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '181'">
|
||||
<dr:CobjCategory type="publication">0008</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Master's thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '182'">
|
||||
<dr:CobjCategory type="publication">0007</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Doctoral and postdoctoral thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '183'">
|
||||
<dr:CobjCategory type="publication">0006</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '18'">
|
||||
<dr:CobjCategory type="publication">0044</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Patent -->
|
||||
<xsl:when test="//base_dc:typenorm = '1A'">
|
||||
<dr:CobjCategory type="publication">0019</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Text -->
|
||||
<xsl:when test="//base_dc:typenorm = '1'">
|
||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Software -->
|
||||
<xsl:when test="//base_dc:typenorm = '6'">
|
||||
<dr:CobjCategory type="software">0029</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Dataset -->
|
||||
<xsl:when test="//base_dc:typenorm = '7'">
|
||||
<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Still image -->
|
||||
<xsl:when test="//base_dc:typenorm = '51'">
|
||||
<dr:CobjCategory type="other">0025</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Moving image/Video -->
|
||||
<xsl:when test="//base_dc:typenorm = '52'">
|
||||
<dr:CobjCategory type="other">0024</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Image/Video -->
|
||||
<xsl:when test="//base_dc:typenorm = '5'">
|
||||
<dr:CobjCategory type="other">0033</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Audio -->
|
||||
<xsl:when test="//base_dc:typenorm = '4'">
|
||||
<dr:CobjCategory type="other">0030</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Musical notation -->
|
||||
<xsl:when test="//base_dc:typenorm = '2'">
|
||||
<dr:CobjCategory type="other">0020</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Map -->
|
||||
<xsl:when test="//base_dc:typenorm = '3'">
|
||||
<dr:CobjCategory type="other">0020</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Other non-article -->
|
||||
<xsl:when test="//base_dc:typenorm = '122'">
|
||||
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Course material -->
|
||||
<xsl:when test="//base_dc:typenorm = '16'">
|
||||
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Manuscript -->
|
||||
<xsl:when test="//base_dc:typenorm = '19'">
|
||||
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Conference object -->
|
||||
<xsl:when test="//base_dc:typenorm = '13'">
|
||||
<dr:CobjCategory type="publication">0004</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Unknown -->
|
||||
<xsl:when test="//base_dc:typenorm = 'F'">
|
||||
<dr:CobjCategory type="other">0000</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<dr:CobjCategory type="other">0000</dr:CobjCategory>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
|
||||
<oaf:accessrights>
|
||||
<xsl:choose>
|
||||
<xsl:when test="//base_dc:oa[.='1']">OPEN</xsl:when>
|
||||
<xsl:when test="//base_dc:rightsnorm">
|
||||
<xsl:value-of select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:access_modes')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="//dc:rights">
|
||||
<xsl:value-of select="vocabulary:clean( //dc:rights, 'dnet:access_modes')" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>UNKNOWN</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</oaf:accessrights>
|
||||
|
||||
<xsl:for-each select="//base_dc:doi">
|
||||
<oaf:identifier identifierType="doi">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each
|
||||
select="distinct-values(//dc:identifier[starts-with(., 'http') and ( not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/')))])">
|
||||
<oaf:identifier identifierType="url">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
|
||||
<oaf:identifier identifierType="handle">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
|
||||
<oaf:identifier identifierType='urn'>
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<oaf:identifier identifierType="oai-original">
|
||||
<xsl:value-of
|
||||
select="//*[local-name() = 'about']/*[local-name() = 'provenance']//*[local-name() = 'originDescription' and not(./*[local-name() = 'originDescription'])]/*[local-name() = 'identifier']" />
|
||||
</oaf:identifier>
|
||||
|
||||
<oaf:hostedBy>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="//base_dc:collname" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="concat('opendoar____::', //base_dc:collection/@opendoar_id)" />
|
||||
</xsl:attribute>
|
||||
</oaf:hostedBy>
|
||||
|
||||
<oaf:collectedFrom>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId" />
|
||||
</xsl:attribute>
|
||||
</oaf:collectedFrom>
|
||||
|
||||
<oaf:dateAccepted>
|
||||
<xsl:value-of select="dateCleaner:dateISO( //dc:date[1] )" />
|
||||
</oaf:dateAccepted>
|
||||
|
||||
<xsl:if test="//base_dc:oa[.='1']">
|
||||
<xsl:for-each select="//dc:relation[starts-with(., 'http')]">
|
||||
<oaf:fulltext>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</oaf:fulltext>
|
||||
</xsl:for-each>
|
||||
</xsl:if>
|
||||
|
||||
<xsl:for-each select="//base_dc:collection/@ror_id">
|
||||
<oaf:relation relType="resultOrganization" subRelType="affiliation" relClass="hasAuthorInstitution" targetType="organization">
|
||||
<xsl:choose>
|
||||
<xsl:when test="contains(.,'https://ror.org/')">
|
||||
<xsl:value-of select="concat('ror_________::', normalize-space(.))" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="concat('ror_________::https://ror.org/', normalize-space(.))" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</oaf:relation>
|
||||
</xsl:for-each>
|
||||
</metadata>
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:if test="//oai:header/@status='deleted'">
|
||||
<xsl:call-template name="terminate" />
|
||||
</xsl:if>
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate" />
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
||||
</CODE>
|
||||
</SCRIPT>
|
||||
</CONFIGURATION>
|
||||
<STATUS />
|
||||
<SECURITY_PARAMETERS />
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
|
@ -0,0 +1,120 @@
|
|||
package eu.dnetlib.dhp.collection.orcid
|
||||
|
||||
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
class SparkApplyUpdate(propertyPath: String, args: Array[String], log: Logger)
|
||||
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||
|
||||
/** Here all the spark applications runs this method
|
||||
* where the whole logic of the spark node is defined
|
||||
*/
|
||||
override def run(): Unit = {
|
||||
|
||||
val graphPath: String = parser.get("graphPath")
|
||||
log.info("found parameters graphPath: {}", graphPath)
|
||||
val updatePath: String = parser.get("updatePath")
|
||||
log.info("found parameters updatePath: {}", updatePath)
|
||||
val targetPath: String = parser.get("targetPath")
|
||||
log.info("found parameters targetPath: {}", targetPath)
|
||||
applyTableUpdate(spark, graphPath, updatePath, targetPath)
|
||||
checkUpdate(spark, graphPath, targetPath)
|
||||
moveTable(spark, graphPath, targetPath)
|
||||
|
||||
}
|
||||
|
||||
private def moveTable(spark: SparkSession, graphPath: String, updatePath: String): Unit = {
|
||||
spark.read
|
||||
.load(s"$updatePath/Authors")
|
||||
.repartition(1000)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$graphPath/Authors")
|
||||
spark.read
|
||||
.load(s"$updatePath/Works")
|
||||
.repartition(1000)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$graphPath/Works")
|
||||
spark.read
|
||||
.load(s"$updatePath/Employments")
|
||||
.repartition(1000)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$graphPath/Employments")
|
||||
|
||||
}
|
||||
|
||||
private def updateDataset(
|
||||
inputDataset: DataFrame,
|
||||
idUpdate: DataFrame,
|
||||
updateDataframe: DataFrame,
|
||||
targetPath: String
|
||||
): Unit = {
|
||||
inputDataset
|
||||
.join(idUpdate, inputDataset("orcid").equalTo(idUpdate("orcid")), "leftanti")
|
||||
.select(inputDataset("*"))
|
||||
.unionByName(updateDataframe)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(targetPath)
|
||||
}
|
||||
|
||||
private def checkUpdate(spark: SparkSession, graphPath: String, updatePath: String): Unit = {
|
||||
val totalOriginalAuthors = spark.read.load(s"$graphPath/Authors").count
|
||||
val totalOriginalWorks = spark.read.load(s"$graphPath/Works").count
|
||||
val totalOriginalEmployments = spark.read.load(s"$graphPath/Employments").count
|
||||
val totalUpdateAuthors = spark.read.load(s"$updatePath/Authors").count
|
||||
val totalUpdateWorks = spark.read.load(s"$updatePath/Works").count
|
||||
val totalUpdateEmployments = spark.read.load(s"$updatePath/Employments").count
|
||||
|
||||
log.info("totalOriginalAuthors: {}", totalOriginalAuthors)
|
||||
log.info("totalOriginalWorks: {}", totalOriginalWorks)
|
||||
log.info("totalOriginalEmployments: {}", totalOriginalEmployments)
|
||||
log.info("totalUpdateAuthors: {}", totalUpdateAuthors)
|
||||
log.info("totalUpdateWorks: {}", totalUpdateWorks)
|
||||
log.info("totalUpdateEmployments: {}", totalUpdateEmployments)
|
||||
if (
|
||||
totalUpdateAuthors < totalOriginalAuthors || totalUpdateEmployments < totalOriginalEmployments || totalUpdateWorks < totalOriginalWorks
|
||||
)
|
||||
throw new RuntimeException("The updated Graph contains less elements of the original one")
|
||||
|
||||
}
|
||||
|
||||
private def applyTableUpdate(spark: SparkSession, graphPath: String, updatePath: String, targetPath: String): Unit = {
|
||||
val orcidIDUpdate = spark.read.load(s"$updatePath/Authors").select("orcid")
|
||||
updateDataset(
|
||||
spark.read.load(s"$graphPath/Authors"),
|
||||
orcidIDUpdate,
|
||||
spark.read.load(s"$updatePath/Authors"),
|
||||
s"$targetPath/Authors"
|
||||
)
|
||||
updateDataset(
|
||||
spark.read.load(s"$graphPath/Employments"),
|
||||
orcidIDUpdate,
|
||||
spark.read.load(s"$updatePath/Employments"),
|
||||
s"$targetPath/Employments"
|
||||
)
|
||||
updateDataset(
|
||||
spark.read.load(s"$graphPath/Works"),
|
||||
orcidIDUpdate,
|
||||
spark.read.load(s"$updatePath/Works"),
|
||||
s"$targetPath/Works"
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object SparkApplyUpdate {
|
||||
|
||||
val log: Logger = LoggerFactory.getLogger(SparkGenerateORCIDTable.getClass)
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
new SparkApplyUpdate("/eu/dnetlib/dhp/collection/orcid/apply_orcid_table_parameter.json", args, log)
|
||||
.initialize()
|
||||
.run()
|
||||
|
||||
}
|
||||
}
|
|
@ -6,6 +6,7 @@ import org.apache.hadoop.io.Text
|
|||
import org.apache.spark.SparkContext
|
||||
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Logger)
|
||||
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||
|
@ -18,12 +19,16 @@ class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Lo
|
|||
log.info("found parameters sourcePath: {}", sourcePath)
|
||||
val targetPath: String = parser.get("targetPath")
|
||||
log.info("found parameters targetPath: {}", targetPath)
|
||||
extractORCIDTable(spark, sourcePath, targetPath)
|
||||
extractORCIDEmploymentsTable(spark, sourcePath, targetPath)
|
||||
extractORCIDWorksTable(spark, sourcePath, targetPath)
|
||||
val fromUpdate = "true".equals(parser.get("fromUpdate"))
|
||||
val sourceSummaryPath = if (fromUpdate) s"$sourcePath/summary*" else sourcePath
|
||||
val sourceEmploymentsPath = if (fromUpdate) s"$sourcePath/employments*" else sourcePath
|
||||
val sourceWorksPath = if (fromUpdate) s"$sourcePath/works*" else sourcePath
|
||||
extractORCIDTable(spark, sourceSummaryPath, targetPath, fromUpdate)
|
||||
extractORCIDEmploymentsTable(spark, sourceEmploymentsPath, targetPath, fromUpdate)
|
||||
extractORCIDWorksTable(spark, sourceWorksPath, targetPath, fromUpdate)
|
||||
}
|
||||
|
||||
def extractORCIDTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
|
||||
def extractORCIDTable(spark: SparkSession, sourcePath: String, targetPath: String, skipFilterByKey: Boolean): Unit = {
|
||||
val sc: SparkContext = spark.sparkContext
|
||||
import spark.implicits._
|
||||
val df = sc
|
||||
|
@ -32,8 +37,8 @@ class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Lo
|
|||
.toDF
|
||||
.as[(String, String)]
|
||||
implicit val orcidAuthor: Encoder[Author] = Encoders.bean(classOf[Author])
|
||||
// implicit val orcidPID:Encoder[Pid] = Encoders.bean(classOf[Pid])
|
||||
df.filter(r => r._1.contains("summaries"))
|
||||
val newDf = if (!skipFilterByKey) df.filter(r => r._1.contains("summaries")) else df
|
||||
newDf
|
||||
.map { r =>
|
||||
val p = new OrcidParser
|
||||
p.parseSummary(r._2)
|
||||
|
@ -44,7 +49,12 @@ class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Lo
|
|||
.save(s"$targetPath/Authors")
|
||||
}
|
||||
|
||||
def extractORCIDWorksTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
|
||||
def extractORCIDWorksTable(
|
||||
spark: SparkSession,
|
||||
sourcePath: String,
|
||||
targetPath: String,
|
||||
skipFilterByKey: Boolean
|
||||
): Unit = {
|
||||
val sc: SparkContext = spark.sparkContext
|
||||
import spark.implicits._
|
||||
val df = sc
|
||||
|
@ -53,19 +63,37 @@ class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Lo
|
|||
.toDF
|
||||
.as[(String, String)]
|
||||
implicit val orcidWorkAuthor: Encoder[Work] = Encoders.bean(classOf[Work])
|
||||
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
|
||||
df.filter(r => r._1.contains("works"))
|
||||
.map { r =>
|
||||
|
||||
//We are in the case of parsing ORCID UPDATE
|
||||
if (skipFilterByKey) {
|
||||
df.flatMap { r =>
|
||||
val p = new OrcidParser
|
||||
p.parseWork(r._2)
|
||||
}
|
||||
.filter(p => p != null)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$targetPath/Works")
|
||||
p.parseWorks(r._2).asScala
|
||||
}.filter(p => p != null)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$targetPath/Works")
|
||||
}
|
||||
//We are in the case of parsing ORCID DUMP
|
||||
else {
|
||||
df.filter(r => r._1.contains("works"))
|
||||
.map { r =>
|
||||
val p = new OrcidParser
|
||||
p.parseWork(r._2)
|
||||
}
|
||||
.filter(p => p != null)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$targetPath/Works")
|
||||
}
|
||||
}
|
||||
|
||||
def extractORCIDEmploymentsTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
|
||||
def extractORCIDEmploymentsTable(
|
||||
spark: SparkSession,
|
||||
sourcePath: String,
|
||||
targetPath: String,
|
||||
skipFilterByKey: Boolean
|
||||
): Unit = {
|
||||
val sc: SparkContext = spark.sparkContext
|
||||
import spark.implicits._
|
||||
val df = sc
|
||||
|
@ -74,16 +102,27 @@ class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Lo
|
|||
.toDF
|
||||
.as[(String, String)]
|
||||
implicit val orcidEmploymentAuthor: Encoder[Employment] = Encoders.bean(classOf[Employment])
|
||||
implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
|
||||
df.filter(r => r._1.contains("employments"))
|
||||
.map { r =>
|
||||
if (skipFilterByKey) {
|
||||
df.flatMap { r =>
|
||||
val p = new OrcidParser
|
||||
p.parseEmployment(r._2)
|
||||
}
|
||||
.filter(p => p != null)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$targetPath/Employments")
|
||||
p.parseEmployments(r._2).asScala
|
||||
}.filter(p => p != null)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$targetPath/Employments")
|
||||
}
|
||||
//We are in the case of parsing ORCID DUMP
|
||||
else {
|
||||
df.filter(r => r._1.contains("employments"))
|
||||
.map { r =>
|
||||
val p = new OrcidParser
|
||||
p.parseEmployment(r._2)
|
||||
}
|
||||
.filter(p => p != null)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$targetPath/Employments")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.dblp;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import javax.xml.crypto.Data;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob;
|
||||
import eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJobTest;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class PrepareDblpActionSetsTest {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionScoreJobTest.class);
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files
|
||||
.createTempDirectory(SparkAtomicActionScoreJobTest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(SparkAtomicActionScoreJobTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(SparkAtomicActionScoreJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
private void runJob(String dblpInputPath, String outputPath) throws Exception {
|
||||
PrepareDblpActionSets
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-dblpInputPath", dblpInputPath,
|
||||
"-outputPath", outputPath,
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
void testXmlParsing() throws Exception {
|
||||
|
||||
String dblpInputPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/dblp/dblp_dump_sample.xml")
|
||||
.getPath();
|
||||
|
||||
String outputPath = workingDir.toString() + "/actionSet";
|
||||
|
||||
// execute the job to generate the action sets for result scores
|
||||
runJob(dblpInputPath, outputPath);
|
||||
|
||||
// TODO: use the data written in `outputPath` to perform tests
|
||||
|
||||
// final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
//
|
||||
// JavaRDD<OafEntity> tmp = sc
|
||||
// .sequenceFile(outputPath, Text.class, Text.class)
|
||||
// .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
// .map(aa -> ((OafEntity) aa.getPayload()));
|
||||
//
|
||||
// assertEquals(8, tmp.count());
|
||||
//
|
||||
// Dataset<OafEntity> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(OafEntity.class));
|
||||
// verificationDataset.createOrReplaceTempView("result");
|
||||
//
|
||||
// Dataset<Row> testDataset = spark
|
||||
// .sql(
|
||||
// "Select p.id oaid, mes.id, mUnit.value from result p " +
|
||||
// "lateral view explode(measures) m as mes " +
|
||||
// "lateral view explode(mes.unit) u as mUnit ");
|
||||
//
|
||||
// Assertions.assertEquals(28, testDataset.count());
|
||||
//
|
||||
// assertResultImpactScores(testDataset);
|
||||
// assertProjectImpactScores(testDataset);
|
||||
|
||||
}
|
||||
|
||||
void assertResultImpactScores(Dataset<Row> testDataset) {
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"6.63451994567e-09", testDataset
|
||||
.filter(
|
||||
"oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " +
|
||||
"and id = 'influence'")
|
||||
.select("value")
|
||||
.collectAsList()
|
||||
.get(0)
|
||||
.getString(0));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"0.348694533145", testDataset
|
||||
.filter(
|
||||
"oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " +
|
||||
"and id = 'popularity_alt'")
|
||||
.select("value")
|
||||
.collectAsList()
|
||||
.get(0)
|
||||
.getString(0));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"2.16094680115e-09", testDataset
|
||||
.filter(
|
||||
"oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " +
|
||||
"and id = 'popularity'")
|
||||
.select("value")
|
||||
.collectAsList()
|
||||
.get(0)
|
||||
.getString(0));
|
||||
}
|
||||
|
||||
void assertProjectImpactScores(Dataset<Row> testDataset) throws Exception {
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"0", testDataset
|
||||
.filter(
|
||||
"oaid='40|nih_________::c02a8233e9b60f05bb418f0c9b714833' " +
|
||||
"and id = 'numOfInfluentialResults'")
|
||||
.select("value")
|
||||
.collectAsList()
|
||||
.get(0)
|
||||
.getString(0));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"1", testDataset
|
||||
.filter(
|
||||
"oaid='40|nih_________::c02a8233e9b60f05bb418f0c9b714833' " +
|
||||
"and id = 'numOfPopularResults'")
|
||||
.select("value")
|
||||
.collectAsList()
|
||||
.get(0)
|
||||
.getString(0));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"25", testDataset
|
||||
.filter(
|
||||
"oaid='40|nih_________::c02a8233e9b60f05bb418f0c9b714833' " +
|
||||
"and id = 'totalImpulse'")
|
||||
.select("value")
|
||||
.collectAsList()
|
||||
.get(0)
|
||||
.getString(0));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"43", testDataset
|
||||
.filter(
|
||||
"oaid='40|nih_________::c02a8233e9b60f05bb418f0c9b714833' " +
|
||||
"and id = 'totalCitationCount'")
|
||||
.select("value")
|
||||
.collectAsList()
|
||||
.get(0)
|
||||
.getString(0));
|
||||
}
|
||||
}
|
|
@ -2,6 +2,7 @@
|
|||
package eu.dnetlib.dhp.collection.orcid;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
@ -9,7 +10,12 @@ import java.util.Objects;
|
|||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||
import org.apache.hadoop.io.compress.CompressionCodecFactory;
|
||||
import org.apache.spark.SparkContext;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
|
@ -27,6 +33,7 @@ import com.ximpleware.XPathParseException;
|
|||
|
||||
import eu.dnetlib.dhp.collection.orcid.model.Author;
|
||||
import eu.dnetlib.dhp.collection.orcid.model.ORCIDItem;
|
||||
import eu.dnetlib.dhp.collection.orcid.model.Work;
|
||||
import eu.dnetlib.dhp.parser.utility.VtdException;
|
||||
|
||||
public class DownloadORCIDTest {
|
||||
|
@ -82,6 +89,34 @@ public class DownloadORCIDTest {
|
|||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParsingOrcidUpdateEmployments() throws Exception {
|
||||
final String xml = IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/update_employments.xml")));
|
||||
|
||||
final OrcidParser parser = new OrcidParser();
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
System.out.println(mapper.writeValueAsString(parser.parseEmployments(xml)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParsingOrcidUpdateWorks() throws Exception {
|
||||
final String xml = IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/update_work.xml")));
|
||||
|
||||
final OrcidParser parser = new OrcidParser();
|
||||
final List<Work> works = parser.parseWorks(xml);
|
||||
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
System.out.println(mapper.writeValueAsString(works));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParsingEmployments() throws Exception {
|
||||
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class BaseCollectionInfo implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = 5766333937429419647L;
|
||||
|
||||
private String id;
|
||||
private String opendoarId;
|
||||
private String rorId;
|
||||
|
||||
public String getId() {
|
||||
return this.id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getOpendoarId() {
|
||||
return this.opendoarId;
|
||||
}
|
||||
|
||||
public void setOpendoarId(final String opendoarId) {
|
||||
this.opendoarId = opendoarId;
|
||||
}
|
||||
|
||||
public String getRorId() {
|
||||
return this.rorId;
|
||||
}
|
||||
|
||||
public void setRorId(final String rorId) {
|
||||
this.rorId = rorId;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,184 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.dom4j.Attribute;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.DocumentHelper;
|
||||
import org.dom4j.Element;
|
||||
import org.dom4j.Node;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
|
||||
@Disabled
|
||||
public class BaseCollectorIteratorTest {
|
||||
|
||||
@Test
|
||||
void testImportFile() throws Exception {
|
||||
|
||||
long count = 0;
|
||||
|
||||
final BaseCollectorIterator iterator = new BaseCollectorIterator("base-sample.tar", new AggregatorReport());
|
||||
|
||||
final Map<String, Map<String, String>> collections = new HashMap<>();
|
||||
final Map<String, AtomicInteger> fields = new HashMap<>();
|
||||
final Set<String> types = new HashSet<>();
|
||||
|
||||
while (iterator.hasNext()) {
|
||||
|
||||
final Document record = DocumentHelper.parseText(iterator.next());
|
||||
|
||||
count++;
|
||||
|
||||
if ((count % 1000) == 0) {
|
||||
System.out.println("# Read records: " + count);
|
||||
}
|
||||
|
||||
// System.out.println(record.asXML());
|
||||
|
||||
for (final Object o : record.selectNodes("//*|//@*")) {
|
||||
final String path = ((Node) o).getPath();
|
||||
|
||||
if (fields.containsKey(path)) {
|
||||
fields.get(path).incrementAndGet();
|
||||
} else {
|
||||
fields.put(path, new AtomicInteger(1));
|
||||
}
|
||||
|
||||
if (o instanceof Element) {
|
||||
final Element n = (Element) o;
|
||||
|
||||
if ("collection".equals(n.getName())) {
|
||||
final String collName = n.getText().trim();
|
||||
if (StringUtils.isNotBlank(collName) && !collections.containsKey(collName)) {
|
||||
final Map<String, String> collAttrs = new HashMap<>();
|
||||
for (final Object ao : n.attributes()) {
|
||||
collAttrs.put(((Attribute) ao).getName(), ((Attribute) ao).getValue());
|
||||
}
|
||||
collections.put(collName, collAttrs);
|
||||
}
|
||||
} else if ("type".equals(n.getName())) {
|
||||
types.add(n.getText().trim());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
for (final Entry<String, Map<String, String>> e : collections.entrySet()) {
|
||||
System.out.println(e.getKey() + ": " + mapper.writeValueAsString(e.getValue()));
|
||||
|
||||
}
|
||||
|
||||
for (final Entry<String, AtomicInteger> e : fields.entrySet()) {
|
||||
System.out.println(e.getKey() + ": " + e.getValue().get());
|
||||
|
||||
}
|
||||
|
||||
System.out.println("TYPES: ");
|
||||
for (final String s : types) {
|
||||
System.out.println(s);
|
||||
|
||||
}
|
||||
|
||||
assertEquals(30000, count);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParquet() throws Exception {
|
||||
|
||||
final String xml = IOUtils.toString(getClass().getResourceAsStream("record.xml"));
|
||||
|
||||
final SparkSession spark = SparkSession.builder().master("local[*]").getOrCreate();
|
||||
|
||||
final List<BaseRecordInfo> ls = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
ls.add(extractInfo(xml));
|
||||
}
|
||||
|
||||
final JavaRDD<BaseRecordInfo> rdd = JavaSparkContext
|
||||
.fromSparkContext(spark.sparkContext())
|
||||
.parallelize(ls);
|
||||
|
||||
final Dataset<BaseRecordInfo> df = spark
|
||||
.createDataset(rdd.rdd(), Encoders.bean(BaseRecordInfo.class));
|
||||
|
||||
df.printSchema();
|
||||
|
||||
df.show(false);
|
||||
}
|
||||
|
||||
private BaseRecordInfo extractInfo(final String s) {
|
||||
try {
|
||||
final Document record = DocumentHelper.parseText(s);
|
||||
|
||||
final BaseRecordInfo info = new BaseRecordInfo();
|
||||
|
||||
final Set<String> paths = new LinkedHashSet<>();
|
||||
final Set<String> types = new LinkedHashSet<>();
|
||||
final List<BaseCollectionInfo> colls = new ArrayList<>();
|
||||
|
||||
for (final Object o : record.selectNodes("//*|//@*")) {
|
||||
paths.add(((Node) o).getPath());
|
||||
|
||||
if (o instanceof Element) {
|
||||
final Element n = (Element) o;
|
||||
|
||||
final String nodeName = n.getName();
|
||||
|
||||
if ("collection".equals(nodeName)) {
|
||||
final String collName = n.getText().trim();
|
||||
|
||||
if (StringUtils.isNotBlank(collName)) {
|
||||
final BaseCollectionInfo coll = new BaseCollectionInfo();
|
||||
coll.setId(collName);
|
||||
coll.setOpendoarId(n.valueOf("@opendoar_id").trim());
|
||||
coll.setRorId(n.valueOf("@ror_id").trim());
|
||||
colls.add(coll);
|
||||
}
|
||||
} else if ("type".equals(nodeName)) {
|
||||
types.add("TYPE: " + n.getText().trim());
|
||||
} else if ("typenorm".equals(nodeName)) {
|
||||
types.add("TYPE_NORM: " + n.getText().trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info.setId(record.valueOf("//*[local-name() = 'header']/*[local-name() = 'identifier']").trim());
|
||||
info.getTypes().addAll(types);
|
||||
info.getPaths().addAll(paths);
|
||||
info.setCollections(colls);
|
||||
|
||||
return info;
|
||||
} catch (final DocumentException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class BaseCollectorPluginTest {
|
||||
|
||||
@Test
|
||||
void testFilterXml() throws Exception {
|
||||
final String xml = IOUtils.toString(getClass().getResourceAsStream("record.xml"));
|
||||
|
||||
final Set<String> validIds = new HashSet<>(Arrays.asList("opendoar____::1234", "opendoar____::4567"));
|
||||
final Set<String> validTypes = new HashSet<>(Arrays.asList("1", "121"));
|
||||
final Set<String> validTypes2 = new HashSet<>(Arrays.asList("1", "11"));
|
||||
|
||||
assertTrue(BaseCollectorPlugin.filterXml(xml, validIds, validTypes));
|
||||
assertTrue(BaseCollectorPlugin.filterXml(xml, validIds, new HashSet<>()));
|
||||
|
||||
assertFalse(BaseCollectorPlugin.filterXml(xml, new HashSet<>(), validTypes));
|
||||
assertFalse(BaseCollectorPlugin.filterXml(xml, validIds, validTypes2));
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class BaseRecordInfo implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = -8848232018350074593L;
|
||||
|
||||
private String id;
|
||||
private List<BaseCollectionInfo> collections = new ArrayList<>();
|
||||
private List<String> paths = new ArrayList<>();
|
||||
private List<String> types = new ArrayList<>();
|
||||
|
||||
public String getId() {
|
||||
return this.id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public List<String> getPaths() {
|
||||
return this.paths;
|
||||
}
|
||||
|
||||
public void setPaths(final List<String> paths) {
|
||||
this.paths = paths;
|
||||
}
|
||||
|
||||
public List<String> getTypes() {
|
||||
return this.types;
|
||||
}
|
||||
|
||||
public void setTypes(final List<String> types) {
|
||||
this.types = types;
|
||||
}
|
||||
|
||||
public List<BaseCollectionInfo> getCollections() {
|
||||
return this.collections;
|
||||
}
|
||||
|
||||
public void setCollections(final List<BaseCollectionInfo> collections) {
|
||||
this.collections = collections;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest;
|
||||
import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
|
||||
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
|
||||
import eu.dnetlib.dhp.schema.mdstore.Provenance;
|
||||
import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
|
||||
// @Disabled
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
public class BaseTransfomationTest extends AbstractVocabularyTest {
|
||||
|
||||
private SparkConf sparkConf;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException, ISLookUpException {
|
||||
setUpVocabulary();
|
||||
|
||||
this.sparkConf = new SparkConf();
|
||||
this.sparkConf.setMaster("local[*]");
|
||||
this.sparkConf.set("spark.driver.host", "localhost");
|
||||
this.sparkConf.set("spark.ui.enabled", "false");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testBase2ODF() throws Exception {
|
||||
|
||||
final MetadataRecord mr = new MetadataRecord();
|
||||
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
|
||||
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("record.xml")));
|
||||
|
||||
final XSLTTransformationFunction tr = loadTransformationRule("xml/base2odf.transformationRule.xml");
|
||||
|
||||
final MetadataRecord result = tr.call(mr);
|
||||
|
||||
System.out.println(result.getBody());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testBase2OAF() throws Exception {
|
||||
|
||||
final MetadataRecord mr = new MetadataRecord();
|
||||
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
|
||||
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("record.xml")));
|
||||
|
||||
final XSLTTransformationFunction tr = loadTransformationRule("xml/base2oaf.transformationRule.xml");
|
||||
|
||||
final MetadataRecord result = tr.call(mr);
|
||||
|
||||
System.out.println(result.getBody());
|
||||
}
|
||||
|
||||
private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception {
|
||||
final String xslt = new SAXReader()
|
||||
.read(this.getClass().getResourceAsStream(path))
|
||||
.selectSingleNode("//CODE/*")
|
||||
.asXML();
|
||||
|
||||
final LongAccumulator la = new LongAccumulator();
|
||||
|
||||
return new XSLTTransformationFunction(new AggregationCounter(la, la, la), xslt, 0, this.vocabularies);
|
||||
}
|
||||
|
||||
}
|
|
@ -9,6 +9,7 @@ import org.junit.jupiter.api.Test;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||
|
||||
/**
|
||||
|
@ -37,7 +38,7 @@ public class RestIteratorTest {
|
|||
|
||||
@Disabled
|
||||
@Test
|
||||
public void test() {
|
||||
public void test() throws CollectorException {
|
||||
|
||||
HttpClientParams clientParams = new HttpClientParams();
|
||||
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.utils;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class JsonUtilsTest {
|
||||
|
||||
static private String wrapped(String xml) {
|
||||
return "<?xml version=\"1.0\" encoding=\"UTF-8\"?><recordWrap>" + xml + "</recordWrap>";
|
||||
}
|
||||
|
||||
@Test
|
||||
void keyStartWithDigit() {
|
||||
assertEquals(
|
||||
wrapped("<m_100><n_200v>null</n_200v></m_100>"),
|
||||
JsonUtils.convertToXML("{\"100\" : {\"200v\" : null}}"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void keyStartWithSpecialchars() {
|
||||
assertEquals(
|
||||
wrapped("<_parent><_nest1><_nest2>null</_nest2></_nest1></_parent>"),
|
||||
JsonUtils.convertToXML("{\" parent\" : {\"-nest1\" : {\".nest2\" : null}}}"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void encodeArray() {
|
||||
assertEquals(
|
||||
wrapped("<_parent.child>1</_parent.child><_parent.child>2</_parent.child>"),
|
||||
JsonUtils.convertToXML("{\" parent.child\":[1, 2]}"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void arrayOfObjects() {
|
||||
assertEquals(
|
||||
wrapped("<parent><id>1</id></parent><parent><id>2</id></parent>"),
|
||||
JsonUtils.convertToXML("{\"parent\": [{\"id\": 1}, {\"id\": 2}]}"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void removeControlCharacters() {
|
||||
assertEquals(
|
||||
wrapped("<m_100><n_200v>Test</n_200v></m_100>"),
|
||||
JsonUtils.convertToXML("{\"100\" : {\"200v\" : \"\\u0000\\u000cTest\"}}"));
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,966 @@
|
|||
<record:record path="/0000-0001-6816-8350" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:education="http://www.orcid.org/ns/education" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:error="http://www.orcid.org/ns/error" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:email="http://www.orcid.org/ns/email" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:address="http://www.orcid.org/ns/address" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:service="http://www.orcid.org/ns/service" xmlns:person="http://www.orcid.org/ns/person" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:peer-review="http://www.orcid.org/ns/peer-review" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:research-resource="http://www.orcid.org/ns/research-resource">
|
||||
<common:orcid-identifier>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:orcid-identifier>
|
||||
<preferences:preferences>
|
||||
<preferences:locale>en</preferences:locale>
|
||||
</preferences:preferences>
|
||||
<history:history>
|
||||
<history:creation-method>Direct</history:creation-method>
|
||||
<history:submission-date>2016-01-06T05:08:45.720Z</history:submission-date>
|
||||
<common:last-modified-date>2024-01-02T20:07:05.186Z</common:last-modified-date>
|
||||
<history:claimed>true</history:claimed>
|
||||
<history:verified-email>true</history:verified-email>
|
||||
<history:verified-primary-email>true</history:verified-primary-email>
|
||||
</history:history>
|
||||
<person:person path="/0000-0001-6816-8350/person">
|
||||
<common:last-modified-date>2023-12-02T13:32:05.269Z</common:last-modified-date>
|
||||
<other-name:other-names path="/0000-0001-6816-8350/other-names"/>
|
||||
<researcher-url:researcher-urls path="/0000-0001-6816-8350/researcher-urls">
|
||||
<common:last-modified-date>2016-02-09T09:18:18.417Z</common:last-modified-date>
|
||||
<researcher-url:researcher-url put-code="633431" visibility="public" path="/0000-0001-6816-8350/researcher-urls/633431" display-index="0">
|
||||
<common:created-date>2016-02-09T09:18:18.416Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T09:18:18.417Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<researcher-url:url-name>Dr Michael Muchiri</researcher-url:url-name>
|
||||
<researcher-url:url>http://www.rmit.edu.au/contact/staff-contacts/academic-staff/m/muchiri-dr-michael</researcher-url:url>
|
||||
</researcher-url:researcher-url>
|
||||
</researcher-url:researcher-urls>
|
||||
<email:emails path="/0000-0001-6816-8350/email"/>
|
||||
<address:addresses path="/0000-0001-6816-8350/address">
|
||||
<common:last-modified-date>2023-12-02T13:32:05.269Z</common:last-modified-date>
|
||||
<address:address put-code="897528" visibility="public" path="/0000-0001-6816-8350/address/897528" display-index="2">
|
||||
<common:created-date>2018-02-13T02:32:04.094Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:32:05.269Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<address:country>AU</address:country>
|
||||
</address:address>
|
||||
<address:address put-code="3191142" visibility="public" path="/0000-0001-6816-8350/address/3191142" display-index="1">
|
||||
<common:created-date>2023-12-02T13:32:05.260Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:32:05.260Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<address:country>SA</address:country>
|
||||
</address:address>
|
||||
</address:addresses>
|
||||
<keyword:keywords path="/0000-0001-6816-8350/keywords">
|
||||
<common:last-modified-date>2023-12-02T13:31:16.269Z</common:last-modified-date>
|
||||
<keyword:keyword put-code="368304" visibility="public" path="/0000-0001-6816-8350/keywords/368304" display-index="4">
|
||||
<common:created-date>2016-02-09T09:16:44.001Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:31:16.269Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<keyword:content>Organizational Behavior</keyword:content>
|
||||
</keyword:keyword>
|
||||
<keyword:keyword put-code="368303" visibility="public" path="/0000-0001-6816-8350/keywords/368303" display-index="3">
|
||||
<common:created-date>2016-02-09T09:16:27.374Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:31:16.269Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<keyword:content>Organizational Leadership</keyword:content>
|
||||
</keyword:keyword>
|
||||
<keyword:keyword put-code="368306" visibility="public" path="/0000-0001-6816-8350/keywords/368306" display-index="2">
|
||||
<common:created-date>2016-02-09T09:17:08.998Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:31:16.269Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<keyword:content>Organizational performance</keyword:content>
|
||||
</keyword:keyword>
|
||||
<keyword:keyword put-code="3590814" visibility="public" path="/0000-0001-6816-8350/keywords/3590814" display-index="1">
|
||||
<common:created-date>2023-12-02T13:31:16.259Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:31:16.259Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<keyword:content>Thriving at work</keyword:content>
|
||||
</keyword:keyword>
|
||||
</keyword:keywords>
|
||||
<external-identifier:external-identifiers path="/0000-0001-6816-8350/external-identifiers">
|
||||
<common:last-modified-date>2018-04-10T00:49:55.386Z</common:last-modified-date>
|
||||
<external-identifier:external-identifier put-code="998076" visibility="public" path="/0000-0001-6816-8350/external-identifiers/998076" display-index="0">
|
||||
<common:created-date>2018-04-10T00:49:55.385Z</common:created-date>
|
||||
<common:last-modified-date>2018-04-10T00:49:55.386Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/0000-0003-1377-5676</common:uri>
|
||||
<common:path>0000-0003-1377-5676</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>ResearcherID</common:source-name>
|
||||
<common:assertion-origin-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:assertion-origin-orcid>
|
||||
</common:source>
|
||||
<common:external-id-type>ResearcherID</common:external-id-type>
|
||||
<common:external-id-value>D-1929-2018</common:external-id-value>
|
||||
<common:external-id-url>http://www.researcherid.com/rid/D-1929-2018</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</external-identifier:external-identifier>
|
||||
</external-identifier:external-identifiers>
|
||||
</person:person>
|
||||
<activities:activities-summary path="/0000-0001-6816-8350/activities">
|
||||
<common:last-modified-date>2023-12-02T13:28:26.051Z</common:last-modified-date>
|
||||
<activities:distinctions path="/0000-0001-6816-8350/distinctions"/>
|
||||
<activities:educations path="/0000-0001-6816-8350/educations">
|
||||
<common:last-modified-date>2018-02-13T02:33:38.225Z</common:last-modified-date>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2016-02-09T06:55:21.838Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<education:education-summary put-code="1549986" display-index="0" path="/0000-0001-6816-8350/education/1549986" visibility="public">
|
||||
<common:created-date>2016-02-09T06:54:39.199Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T06:55:21.838Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Management</common:department-name>
|
||||
<common:role-title>PhD</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2021</common:year>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2022</common:year>
|
||||
<common:month>02</common:month>
|
||||
<common:day>12</common:day>
|
||||
</common:end-date>
|
||||
<common:start-date>
|
||||
<common:year>2003</common:year>
|
||||
<common:month>03</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2007</common:year>
|
||||
<common:month>03</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>University of New England</common:name>
|
||||
<common:address>
|
||||
<common:city>Armidale</common:city>
|
||||
<common:region>NSW</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>1319</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</education:education-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2018-02-13T02:33:38.225Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<education:education-summary put-code="1549990" display-index="0" path="/0000-0001-6816-8350/education/1549990" visibility="public">
|
||||
<common:created-date>2016-02-09T06:57:04.181Z</common:created-date>
|
||||
<common:last-modified-date>2018-02-13T02:33:38.225Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Psychology</common:department-name>
|
||||
<common:role-title>Master of Science (Industrial and Organizational) Psychology</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>1998</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2000</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Universitas Gadjah Mada</common:name>
|
||||
<common:address>
|
||||
<common:city>Yogyakarta</common:city>
|
||||
<common:region>Daerah Istimewa Yogyakart</common:region>
|
||||
<common:country>ID</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>59166</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</education:education-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2018-02-13T02:33:35.821Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<education:education-summary put-code="1549998" display-index="0" path="/0000-0001-6816-8350/education/1549998" visibility="public">
|
||||
<common:created-date>2016-02-09T06:58:59.869Z</common:created-date>
|
||||
<common:last-modified-date>2018-02-13T02:33:35.821Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Education</common:department-name>
|
||||
<common:role-title>Bachelor of Education (Honors)</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>1988</common:year>
|
||||
<common:month>03</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>1991</common:year>
|
||||
<common:month>03</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Kenyatta University</common:name>
|
||||
<common:address>
|
||||
<common:city>Nairobi</common:city>
|
||||
<common:region>Nairobi</common:region>
|
||||
<common:country>KE</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>107864</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</education:education-summary>
|
||||
</activities:affiliation-group>
|
||||
</activities:educations>
|
||||
<activities:employments path="/0000-0001-6816-8350/employments">
|
||||
<common:last-modified-date>2023-12-02T13:28:26.051Z</common:last-modified-date>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2023-12-02T13:28:26.051Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="21884863" display-index="1" path="/0000-0001-6816-8350/employment/21884863" visibility="public">
|
||||
<common:created-date>2023-12-02T13:28:26.051Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:28:26.051Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Management</common:department-name>
|
||||
<common:role-title>Associate Professor in Management</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2023</common:year>
|
||||
<common:month>08</common:month>
|
||||
<common:day>20</common:day>
|
||||
</common:start-date>
|
||||
<common:organization>
|
||||
<common:name>Alfaisal University</common:name>
|
||||
<common:address>
|
||||
<common:city>Riyadh</common:city>
|
||||
<common:country>SA</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>https://ror.org/00cdrtq48</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
<common:url>https://faculty.alfaisal.edu/user/mmuchiri</common:url>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2016-02-09T07:00:06.052Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="1550002" display-index="0" path="/0000-0001-6816-8350/employment/1550002" visibility="public">
|
||||
<common:created-date>2016-02-09T07:00:06.052Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T07:00:06.052Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Management</common:department-name>
|
||||
<common:role-title>Senior Lecturer</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2014</common:year>
|
||||
<common:month>02</common:month>
|
||||
</common:start-date>
|
||||
<common:organization>
|
||||
<common:name>RMIT University</common:name>
|
||||
<common:address>
|
||||
<common:city>Melbourne</common:city>
|
||||
<common:region>VIC</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>5376</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2016-02-09T07:01:08.398Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="1550007" display-index="0" path="/0000-0001-6816-8350/employment/1550007" visibility="public">
|
||||
<common:created-date>2016-02-09T07:01:08.398Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T07:01:08.398Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Management</common:department-name>
|
||||
<common:role-title>Senior Lecturer in Human Resource Management</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2010</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2014</common:year>
|
||||
<common:month>02</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Central Queensland University</common:name>
|
||||
<common:address>
|
||||
<common:city>Rockhampton</common:city>
|
||||
<common:region>QLD</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>273488</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2016-02-09T07:01:47.814Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="1550010" display-index="0" path="/0000-0001-6816-8350/employment/1550010" visibility="public">
|
||||
<common:created-date>2016-02-09T07:01:47.814Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T07:01:47.814Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Management</common:department-name>
|
||||
<common:role-title>Lecturer in Management</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2007</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2010</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Central Queensland University</common:name>
|
||||
<common:address>
|
||||
<common:city>Rockhampton</common:city>
|
||||
<common:region>QLD</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>273488</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2018-02-13T02:33:13.213Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="1550017" display-index="0" path="/0000-0001-6816-8350/employment/1550017" visibility="public">
|
||||
<common:created-date>2016-02-09T07:03:42.180Z</common:created-date>
|
||||
<common:last-modified-date>2018-02-13T02:33:13.213Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Human Resource Development Division</common:department-name>
|
||||
<common:role-title>Chief Human Resource Development Officer</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2005</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2007</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Government of Kenya Directorate of Personnel Management</common:name>
|
||||
<common:address>
|
||||
<common:city>Nairobi</common:city>
|
||||
<common:region>Nairobi</common:region>
|
||||
<common:country>KE</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>360256</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2016-02-09T07:05:02.300Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="1550020" display-index="0" path="/0000-0001-6816-8350/employment/1550020" visibility="public">
|
||||
<common:created-date>2016-02-09T07:05:02.300Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T07:05:02.300Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Human Resource Development Division</common:department-name>
|
||||
<common:role-title>Human Resource Development Officer</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2001</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2005</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Government of Kenya Directorate of Personnel Management</common:name>
|
||||
<common:address>
|
||||
<common:city>Nairobi</common:city>
|
||||
<common:region>Nairobi</common:region>
|
||||
<common:country>KE</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>360256</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2016-02-09T07:36:52.398Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="1550050" display-index="0" path="/0000-0001-6816-8350/employment/1550050" visibility="public">
|
||||
<common:created-date>2016-02-09T07:36:52.398Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T07:36:52.398Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Public Sector Management Technical Assistance Project</common:department-name>
|
||||
<common:role-title>Project Coordinator for Development Learning Centre</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2002</common:year>
|
||||
<common:month>08</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2003</common:year>
|
||||
<common:month>03</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Government of Kenya Directorate of Personnel Management</common:name>
|
||||
<common:address>
|
||||
<common:city>Nairobi</common:city>
|
||||
<common:region>Nairobi</common:region>
|
||||
<common:country>KE</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>360256</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
</activities:employments>
|
||||
<activities:fundings path="/0000-0001-6816-8350/fundings">
|
||||
<common:last-modified-date>2016-02-09T09:05:27.100Z</common:last-modified-date>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2016-02-09T09:05:27.100Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<funding:funding-summary put-code="150520" path="/0000-0001-6816-8350/funding/150520" visibility="public" display-index="6">
|
||||
<common:created-date>2016-02-09T09:05:27.100Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T09:05:27.100Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<funding:title>
|
||||
<common:title>A cross-country examination of Employee Wellbeing, Leadership, High Performance Work Systems and Innovative Behaviours</common:title>
|
||||
</funding:title>
|
||||
<funding:type>grant</funding:type>
|
||||
<common:start-date>
|
||||
<common:year>2016</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2016</common:year>
|
||||
<common:month>12</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>RMIT University</common:name>
|
||||
<common:address>
|
||||
<common:city>VIC</common:city>
|
||||
<common:region>VIC</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/501100001780</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</funding:funding-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2016-02-09T09:03:51.641Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<funding:funding-summary put-code="150518" path="/0000-0001-6816-8350/funding/150518" visibility="public" display-index="5">
|
||||
<common:created-date>2016-02-09T09:03:51.641Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T09:03:51.641Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<funding:title>
|
||||
<common:title>Leading Safe and Thriving Organisations: An Investigation of the Relationships between Leadership, Thriving Behaviour, Authentic Followership and Safety Climate in an Australian Multinational Enterprise</common:title>
|
||||
</funding:title>
|
||||
<funding:type>grant</funding:type>
|
||||
<common:start-date>
|
||||
<common:year>2015</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2015</common:year>
|
||||
<common:month>12</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>RMIT University</common:name>
|
||||
<common:address>
|
||||
<common:city>VIC</common:city>
|
||||
<common:region>VIC</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/501100001780</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</funding:funding-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2016-02-09T09:02:28.297Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<funding:funding-summary put-code="150516" path="/0000-0001-6816-8350/funding/150516" visibility="public" display-index="4">
|
||||
<common:created-date>2016-02-09T09:02:28.297Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T09:02:28.297Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<funding:title>
|
||||
<common:title>A multilevel, cross-country examination of leadership, followership and innovative behaviours in Australia and Indonesia. </common:title>
|
||||
</funding:title>
|
||||
<funding:type>grant</funding:type>
|
||||
<common:start-date>
|
||||
<common:year>2015</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2015</common:year>
|
||||
<common:month>12</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>RMIT University</common:name>
|
||||
<common:address>
|
||||
<common:city>VIC</common:city>
|
||||
<common:region>VIC</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/501100001780</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</funding:funding-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2016-02-09T09:00:51.749Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<funding:funding-summary put-code="150514" path="/0000-0001-6816-8350/funding/150514" visibility="public" display-index="3">
|
||||
<common:created-date>2016-02-09T09:00:51.749Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T09:00:51.749Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<funding:title>
|
||||
<common:title>Workplace safety and positive leadership: Exploring relationships between leader behaviours, organisational climate, safety climate, safety citizenship behaviours and innovative behaviours within city councils in Victoria </common:title>
|
||||
</funding:title>
|
||||
<funding:type>grant</funding:type>
|
||||
<common:start-date>
|
||||
<common:year>2014</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2014</common:year>
|
||||
<common:month>12</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>RMIT University</common:name>
|
||||
<common:address>
|
||||
<common:city>VIC</common:city>
|
||||
<common:region>VIC</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/501100001780</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</funding:funding-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2016-02-09T07:46:44.919Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<funding:funding-summary put-code="150485" path="/0000-0001-6816-8350/funding/150485" visibility="public" display-index="0">
|
||||
<common:created-date>2016-02-09T07:46:44.919Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T07:46:44.919Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<funding:title>
|
||||
<common:title>Sustainable Business Model for Central Queensland Regional Information Systems.</common:title>
|
||||
</funding:title>
|
||||
<funding:type>grant</funding:type>
|
||||
<common:start-date>
|
||||
<common:year>2008</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2008</common:year>
|
||||
<common:month>12</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Department of Local Government, Planning, Sport and Recreation, Queensland, Australia </common:name>
|
||||
<common:address>
|
||||
<common:city>Rockhampton</common:city>
|
||||
<common:region>Central Queensland</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
</common:organization>
|
||||
</funding:funding-summary>
|
||||
</activities:group>
|
||||
</activities:fundings>
|
||||
<activities:invited-positions path="/0000-0001-6816-8350/invited-positions"/>
|
||||
<activities:memberships path="/0000-0001-6816-8350/memberships"/>
|
||||
<activities:peer-reviews path="/0000-0001-6816-8350/peer-reviews">
|
||||
<common:last-modified-date>2023-05-31T05:53:44.542Z</common:last-modified-date>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2023-05-31T05:53:44.542Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>peer-review</common:external-id-type>
|
||||
<common:external-id-value>issn:0167-4544</common:external-id-value>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<activities:peer-review-group>
|
||||
<common:last-modified-date>2023-02-28T06:51:52.426Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>source-work-id</common:external-id-type>
|
||||
<common:external-id-value>c9bdf086-cfee-4cd9-bcfb-268cc5423248</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">c9bdf086-cfee-4cd9-bcfb-268cc5423248</common:external-id-normalized>
|
||||
<common:external-id-url></common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<peer-review:peer-review-summary put-code="8741329" path="/0000-0001-6816-8350/peer-review/8741329" visibility="public" display-index="0">
|
||||
<common:created-date>2023-02-28T06:51:52.426Z</common:created-date>
|
||||
<common:last-modified-date>2023-02-28T06:51:52.426Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/APP-945VYTN20C7BZXYT</common:uri>
|
||||
<common:path>APP-945VYTN20C7BZXYT</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Springer Nature @ Editorial Manager</common:source-name>
|
||||
</common:source>
|
||||
<peer-review:reviewer-role>reviewer</peer-review:reviewer-role>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>source-work-id</common:external-id-type>
|
||||
<common:external-id-value>c9bdf086-cfee-4cd9-bcfb-268cc5423248</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">c9bdf086-cfee-4cd9-bcfb-268cc5423248</common:external-id-normalized>
|
||||
<common:external-id-url></common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<peer-review:review-type>review</peer-review:review-type>
|
||||
<peer-review:completion-date>
|
||||
<common:year>2023</common:year>
|
||||
</peer-review:completion-date>
|
||||
<peer-review:review-group-id>issn:0167-4544</peer-review:review-group-id>
|
||||
<peer-review:convening-organization>
|
||||
<common:name>Springer Nature</common:name>
|
||||
<common:address>
|
||||
<common:city>New York</common:city>
|
||||
<common:country>US</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>grid.467660.5</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>GRID</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</peer-review:convening-organization>
|
||||
</peer-review:peer-review-summary>
|
||||
</activities:peer-review-group>
|
||||
<activities:peer-review-group>
|
||||
<common:last-modified-date>2023-05-31T05:53:44.542Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>source-work-id</common:external-id-type>
|
||||
<common:external-id-value>c442840b-5807-459d-802a-303d8ba4e25e</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">c442840b-5807-459d-802a-303d8ba4e25e</common:external-id-normalized>
|
||||
<common:external-id-url></common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<peer-review:peer-review-summary put-code="9680570" path="/0000-0001-6816-8350/peer-review/9680570" visibility="public" display-index="0">
|
||||
<common:created-date>2023-05-31T05:53:44.542Z</common:created-date>
|
||||
<common:last-modified-date>2023-05-31T05:53:44.542Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/APP-945VYTN20C7BZXYT</common:uri>
|
||||
<common:path>APP-945VYTN20C7BZXYT</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Springer Nature @ Editorial Manager</common:source-name>
|
||||
</common:source>
|
||||
<peer-review:reviewer-role>reviewer</peer-review:reviewer-role>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>source-work-id</common:external-id-type>
|
||||
<common:external-id-value>c442840b-5807-459d-802a-303d8ba4e25e</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">c442840b-5807-459d-802a-303d8ba4e25e</common:external-id-normalized>
|
||||
<common:external-id-url></common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<peer-review:review-type>review</peer-review:review-type>
|
||||
<peer-review:completion-date>
|
||||
<common:year>2023</common:year>
|
||||
</peer-review:completion-date>
|
||||
<peer-review:review-group-id>issn:0167-4544</peer-review:review-group-id>
|
||||
<peer-review:convening-organization>
|
||||
<common:name>Springer Nature</common:name>
|
||||
<common:address>
|
||||
<common:city>New York</common:city>
|
||||
<common:country>US</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>grid.467660.5</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>GRID</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</peer-review:convening-organization>
|
||||
</peer-review:peer-review-summary>
|
||||
</activities:peer-review-group>
|
||||
</activities:group>
|
||||
</activities:peer-reviews>
|
||||
<activities:qualifications path="/0000-0001-6816-8350/qualifications"/>
|
||||
<activities:research-resources path="/0000-0001-6816-8350/research-resources"/>
|
||||
<activities:services path="/0000-0001-6816-8350/services"/>
|
||||
<activities:works path="/0000-0001-6816-8350/works">
|
||||
<common:last-modified-date>2023-06-02T20:12:00.338Z</common:last-modified-date>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2023-06-02T20:12:00.338Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.4337/9781800881945.00020</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.4337/9781800881945.00020</common:external-id-normalized>
|
||||
<common:external-id-url>https://doi.org/10.4337/9781800881945.00020</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<work:work-summary put-code="134891279" path="/0000-0001-6816-8350/work/134891279" visibility="public" display-index="0">
|
||||
<common:created-date>2023-05-11T21:05:54.188Z</common:created-date>
|
||||
<common:last-modified-date>2023-06-02T20:12:00.338Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/0000-0001-9884-1913</common:uri>
|
||||
<common:path>0000-0001-9884-1913</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Crossref</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Ethical leadership as workplace innovation and enabler for employee commitment and innovative work behaviours in Vietnam</common:title>
|
||||
</work:title>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.4337/9781800881945.00020</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.4337/9781800881945.00020</common:external-id-normalized>
|
||||
<common:external-id-url>https://doi.org/10.4337/9781800881945.00020</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>https://doi.org/10.4337/9781800881945.00020</common:url>
|
||||
<work:type>book-chapter</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2023</common:year>
|
||||
<common:month>05</common:month>
|
||||
<common:day>26</common:day>
|
||||
</common:publication-date>
|
||||
</work:work-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2023-03-01T11:30:31.972Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.1007/s10551-022-05081-6</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.1007/s10551-022-05081-6</common:external-id-normalized>
|
||||
<common:external-id-url>https://doi.org/10.1007/s10551-022-05081-6</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<work:work-summary put-code="110048777" path="/0000-0001-6816-8350/work/110048777" visibility="public" display-index="0">
|
||||
<common:created-date>2022-03-18T03:36:55.927Z</common:created-date>
|
||||
<common:last-modified-date>2023-03-01T11:30:31.972Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/0000-0001-9884-1913</common:uri>
|
||||
<common:path>0000-0001-9884-1913</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Crossref</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Unethical Leadership: Review, Synthesis and Directions for Future Research</common:title>
|
||||
</work:title>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.1007/s10551-022-05081-6</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.1007/s10551-022-05081-6</common:external-id-normalized>
|
||||
<common:external-id-url>https://doi.org/10.1007/s10551-022-05081-6</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>https://doi.org/10.1007/s10551-022-05081-6</common:url>
|
||||
<work:type>journal-article</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2023</common:year>
|
||||
<common:month>03</common:month>
|
||||
</common:publication-date>
|
||||
<work:journal-title>Journal of Business Ethics</work:journal-title>
|
||||
</work:work-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2022-05-28T18:16:16.575Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.1017/jmo.2019.33</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.1017/jmo.2019.33</common:external-id-normalized>
|
||||
<common:external-id-url>https://doi.org/10.1017/jmo.2019.33</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<work:work-summary put-code="57272180" path="/0000-0001-6816-8350/work/57272180" visibility="public" display-index="0">
|
||||
<common:created-date>2019-05-10T07:23:14.608Z</common:created-date>
|
||||
<common:last-modified-date>2022-05-28T18:16:16.575Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/0000-0001-9884-1913</common:uri>
|
||||
<common:path>0000-0001-9884-1913</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Crossref</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>And now for something completely different: Reframing social processes of leadership theory using positive organisational behaviour</common:title>
|
||||
</work:title>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.1017/jmo.2019.33</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.1017/jmo.2019.33</common:external-id-normalized>
|
||||
<common:external-id-url>https://doi.org/10.1017/jmo.2019.33</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>https://doi.org/10.1017/jmo.2019.33</common:url>
|
||||
<work:type>journal-article</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2019</common:year>
|
||||
<common:month>05</common:month>
|
||||
<common:day>09</common:day>
|
||||
</common:publication-date>
|
||||
<work:journal-title>Journal of Management & Organization</work:journal-title>
|
||||
</work:work-summary>
|
||||
</activities:group>
|
||||
</activities:works>
|
||||
</activities:activities-summary>
|
||||
</record:record>
|
|
@ -0,0 +1,60 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<activities:employments path="/0000-0002-0456-1185/employments" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:education="http://www.orcid.org/ns/education" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:error="http://www.orcid.org/ns/error" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:email="http://www.orcid.org/ns/email" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:address="http://www.orcid.org/ns/address" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:service="http://www.orcid.org/ns/service" xmlns:person="http://www.orcid.org/ns/person" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:peer-review="http://www.orcid.org/ns/peer-review" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:research-resource="http://www.orcid.org/ns/research-resource">
|
||||
<common:last-modified-date>2024-01-07T23:59:38.869Z</common:last-modified-date>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2024-01-07T23:59:38.869Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="22127142" display-index="0" path="/0000-0002-0456-1185/employment/22127142" visibility="public">
|
||||
<common:created-date>2024-01-07T23:59:38.869Z</common:created-date>
|
||||
<common:last-modified-date>2024-01-07T23:59:38.869Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/APP-N0TAO4G9BBK9PWHT</common:uri>
|
||||
<common:path>APP-N0TAO4G9BBK9PWHT</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Tampere University</common:source-name>
|
||||
</common:source>
|
||||
<common:organization>
|
||||
<common:name>Tampere University</common:name>
|
||||
<common:address>
|
||||
<common:city>Tampere</common:city>
|
||||
<common:country>FI</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>https://ror.org/033003e23</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2019-01-03T17:00:05.658Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="3291239" display-index="1" path="/0000-0002-0456-1185/employment/3291239" visibility="public">
|
||||
<common:created-date>2017-02-26T04:46:20.917Z</common:created-date>
|
||||
<common:last-modified-date>2019-01-03T17:00:05.658Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0002-0456-1185</common:uri>
|
||||
<common:path>0000-0002-0456-1185</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Tiina Manninen</common:source-name>
|
||||
</common:source>
|
||||
<common:department-name> Faculty of Medicine and Health Technology</common:department-name>
|
||||
<common:role-title>Academy Research Fellow</common:role-title>
|
||||
<common:organization>
|
||||
<common:name>Tampere University</common:name>
|
||||
<common:address>
|
||||
<common:city>Tampere</common:city>
|
||||
<common:country>FI</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>7839</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
</activities:employments>
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
@ -0,0 +1,58 @@
|
|||
<record>
|
||||
<header xmlns="http://www.openarchives.org/OAI/2.0/">
|
||||
<identifier>ftdoajarticles:oai:doaj.org/article:e2d5b5126b2d4e479933cc7f9a9ae0c1</identifier>
|
||||
<datestamp>2022-12-31T11:48:55Z</datestamp>
|
||||
</header>
|
||||
<metadata xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:base_dc="http://oai.base-search.net/base_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<base_dc:dc xsi:schemaLocation="http://oai.base-search.net/base_dc/ http://oai.base-search.net/base_dc/base_dc.xsd">
|
||||
<base_dc:global_id>ftdoajarticles:oai:doaj.org/article:e2d5b5126b2d4e479933cc7f9a9ae0c1</base_dc:global_id>
|
||||
<base_dc:continent>cww</base_dc:continent>
|
||||
<base_dc:country>org</base_dc:country>
|
||||
<base_dc:collection opendoar_id="1234" ror_id="ror1234">ftdoajarticles</base_dc:collection>
|
||||
<base_dc:collname>TEST REPO</base_dc:collname>
|
||||
<dc:title>Assessment of cultural heritage: the legislative and methodological framework of Russian Federation</dc:title>
|
||||
<dc:creator>ALBU, Svetlana</dc:creator>
|
||||
<dc:creator>LEȘAN, Anna</dc:creator>
|
||||
<dc:subject>architectural heritage</dc:subject>
|
||||
<dc:subject>evaluation of architectural heritage</dc:subject>
|
||||
<dc:subject>types of values</dc:subject>
|
||||
<dc:subject>experience of russian federation</dc:subject>
|
||||
<dc:subject>Social Sciences</dc:subject>
|
||||
<dc:subject>H</dc:subject>
|
||||
<dc:description>Architectural heritage is the real estate inheritance by population of a country becoming an extremely valuable and specific category, preserving and capitalizing on those assets requires considerable effort. The state does not have sufficient means to maintain and preserve cultural heritage, as a result it is included in the civil circuit. The transfer of property right or of some partial rights over the architectural patrimony is accompanied by the necessity to estimate the value of goods. In this article, the authors examine the experience of Russian Federation (one of the largest countries with a huge architectural heritage) on the legislative framework of architectural and methodological heritage of architectural heritage assessment. The particularities of cultural assets valuation compared to other categories of real estate are examined, as well as the methodological aspects (types of values, methods applied in valuation, approaches according to the purpose of valuation) regarding the valuation of real estate with architectural value in Russian Federation.</dc:description>
|
||||
<dc:publisher>Technical University of Moldova</dc:publisher>
|
||||
<dc:date>2020-09-01T00:00:00Z</dc:date>
|
||||
<base_dc:year>2020</base_dc:year>
|
||||
<dc:type>article</dc:type>
|
||||
<base_dc:typenorm>121</base_dc:typenorm>
|
||||
<dc:identifier>https://doi.org/10.5281/zenodo.3971988</dc:identifier>
|
||||
<dc:identifier>https://doaj.org/article/e2d5b5126b2d4e479933cc7f9a9ae0c1</dc:identifier>
|
||||
<base_dc:link>https://doi.org/10.5281/zenodo.3971988</base_dc:link>
|
||||
<dc:source>Journal of Social Sciences, Vol 3, Iss 3, Pp 134-143 (2020)</dc:source>
|
||||
<dc:language>EN</dc:language>
|
||||
<dc:language>FR</dc:language>
|
||||
<dc:language>RO</dc:language>
|
||||
<dc:relation>http://ibn.idsi.md/sites/default/files/imag_file/JSS-3-2020_134-143.pdf</dc:relation>
|
||||
<dc:relation>https://doaj.org/toc/2587-3490</dc:relation>
|
||||
<dc:relation>https://doaj.org/toc/2587-3504</dc:relation>
|
||||
<dc:relation>doi:10.5281/zenodo.3971988</dc:relation>
|
||||
<dc:relation>2587-3490</dc:relation>
|
||||
<dc:relation>2587-3504</dc:relation>
|
||||
<dc:relation>https://doaj.org/article/e2d5b5126b2d4e479933cc7f9a9ae0c1</dc:relation>
|
||||
<base_dc:autoclasscode type="ddc">720</base_dc:autoclasscode>
|
||||
<base_dc:authod_id>
|
||||
<base_dc:creator_name>ALBU, Svetlana</base_dc:creator_name>
|
||||
<base_dc:creator_id>https://orcid.org/0000-0002-8648-950X</base_dc:creator_id>
|
||||
</base_dc:authod_id>
|
||||
<base_dc:authod_id>
|
||||
<base_dc:creator_name>LEȘAN, Anna</base_dc:creator_name>
|
||||
<base_dc:creator_id>https://orcid.org/0000-0003-3284-0525</base_dc:creator_id>
|
||||
</base_dc:authod_id>
|
||||
<base_dc:doi>https://doi.org/10.5281/zenodo.3971988</base_dc:doi>
|
||||
<base_dc:oa>1</base_dc:oa>
|
||||
<base_dc:lang>eng</base_dc:lang>
|
||||
<base_dc:lang>fre</base_dc:lang>
|
||||
<base_dc:lang>rum</base_dc:lang>
|
||||
</base_dc:dc>
|
||||
</metadata>
|
||||
</record>
|
|
@ -1496,4 +1496,30 @@ cnr:institutes @=@ __CDS131__ @=@ IBE - Istituto per la BioEconomia
|
|||
cnr:institutes @=@ https://ror.org/0263zy895 @=@ CDS132
|
||||
cnr:institutes @=@ https://ror.org/0263zy895 @=@ SCITEC - Istituto di Scienze e Tecnologie Chimiche \"Giulio Natta\"
|
||||
cnr:institutes @=@ __CDS133__ @=@ CDS133
|
||||
cnr:institutes @=@ __CDS133__ @=@ STEMS - Istituto di Scienze e Tecnologie per l'Energia e la Mobilità Sostenibili
|
||||
cnr:institutes @=@ __CDS133__ @=@ STEMS - Istituto di Scienze e Tecnologie per l'Energia e la Mobilità Sostenibili
|
||||
base:normalized_types @=@ Text @=@ 1
|
||||
base:normalized_types @=@ Book @=@ 11
|
||||
base:normalized_types @=@ Book part @=@ 111
|
||||
base:normalized_types @=@ Journal/Newspaper @=@ 12
|
||||
base:normalized_types @=@ Article contribution @=@ 121
|
||||
base:normalized_types @=@ Other non-article @=@ 122
|
||||
base:normalized_types @=@ Conference object @=@ 13
|
||||
base:normalized_types @=@ Report @=@ 14
|
||||
base:normalized_types @=@ Review @=@ 15
|
||||
base:normalized_types @=@ Course material @=@ 16
|
||||
base:normalized_types @=@ Lecture @=@ 17
|
||||
base:normalized_types @=@ Thesis @=@ 18
|
||||
base:normalized_types @=@ Bachelor's thesis @=@ 181
|
||||
base:normalized_types @=@ Master's thesis @=@ 182
|
||||
base:normalized_types @=@ Doctoral and postdoctoral thesis @=@ 183
|
||||
base:normalized_types @=@ Manuscript @=@ 19
|
||||
base:normalized_types @=@ Patent @=@ 1A
|
||||
base:normalized_types @=@ Musical notation @=@ 2
|
||||
base:normalized_types @=@ Map @=@ 3
|
||||
base:normalized_types @=@ Audio @=@ 4
|
||||
base:normalized_types @=@ Image/Video @=@ 5
|
||||
base:normalized_types @=@ Still image @=@ 51
|
||||
base:normalized_types @=@ Moving image/Video @=@ 52
|
||||
base:normalized_types @=@ Software @=@ 6
|
||||
base:normalized_types @=@ Dataset @=@ 7
|
||||
base:normalized_types @=@ Unknown @=@ F
|
||||
|
|
|
@ -1210,4 +1210,29 @@ cnr:institutes @=@ cnr:institutes @=@ __CDS130__ @=@ __CDS130__
|
|||
cnr:institutes @=@ cnr:institutes @=@ __CDS131__ @=@ __CDS131__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0263zy895 @=@ https://ror.org/0263zy895
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS133__ @=@ __CDS133__
|
||||
|
||||
base:normalized_types @=@ base:normalized_types @=@ Text @=@ Text
|
||||
base:normalized_types @=@ base:normalized_types @=@ Book @=@ Book
|
||||
base:normalized_types @=@ base:normalized_types @=@ Book part @=@ Book part
|
||||
base:normalized_types @=@ base:normalized_types @=@ Journal/Newspaper @=@ Journal/Newspaper
|
||||
base:normalized_types @=@ base:normalized_types @=@ Article contribution @=@ Article contribution
|
||||
base:normalized_types @=@ base:normalized_types @=@ Other non-article @=@ Other non-article
|
||||
base:normalized_types @=@ base:normalized_types @=@ Conference object @=@ Conference object
|
||||
base:normalized_types @=@ base:normalized_types @=@ Report @=@ Report
|
||||
base:normalized_types @=@ base:normalized_types @=@ Review @=@ Review
|
||||
base:normalized_types @=@ base:normalized_types @=@ Course material @=@ Course material
|
||||
base:normalized_types @=@ base:normalized_types @=@ Lecture @=@ Lecture
|
||||
base:normalized_types @=@ base:normalized_types @=@ Thesis @=@ Thesis
|
||||
base:normalized_types @=@ base:normalized_types @=@ Bachelor's thesis @=@ Bachelor's thesis
|
||||
base:normalized_types @=@ base:normalized_types @=@ Master's thesis @=@ Master's thesis
|
||||
base:normalized_types @=@ base:normalized_types @=@ Doctoral and postdoctoral thesis @=@ Doctoral and postdoctoral thesis
|
||||
base:normalized_types @=@ base:normalized_types @=@ Manuscript @=@ Manuscript
|
||||
base:normalized_types @=@ base:normalized_types @=@ Patent @=@ Patent
|
||||
base:normalized_types @=@ base:normalized_types @=@ Musical notation @=@ Musical notation
|
||||
base:normalized_types @=@ base:normalized_types @=@ Map @=@ Map
|
||||
base:normalized_types @=@ base:normalized_types @=@ Audio @=@ Audio
|
||||
base:normalized_types @=@ base:normalized_types @=@ Image/Video @=@ Image/Video
|
||||
base:normalized_types @=@ base:normalized_types @=@ Still image @=@ Still image
|
||||
base:normalized_types @=@ base:normalized_types @=@ Moving image/Video @=@ Moving image/Video
|
||||
base:normalized_types @=@ base:normalized_types @=@ Software @=@ Software
|
||||
base:normalized_types @=@ base:normalized_types @=@ Dataset @=@ Dataset
|
||||
base:normalized_types @=@ base:normalized_types @=@ Unknown @=@ Unknown
|
|
@ -122,22 +122,41 @@ public class DedupRecordFactory {
|
|||
}
|
||||
|
||||
return Stream
|
||||
.concat(Stream.of(agg.getDedupId()), agg.aliases.stream())
|
||||
.map(id -> {
|
||||
try {
|
||||
OafEntity res = (OafEntity) BeanUtils.cloneBean(agg.entity);
|
||||
res.setId(id);
|
||||
res.setDataInfo(dataInfo);
|
||||
res.setLastupdatetimestamp(ts);
|
||||
return res;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
})
|
||||
.concat(
|
||||
Stream
|
||||
.of(agg.getDedupId())
|
||||
.map(id -> createDedupOafEntity(id, agg.entity, dataInfo, ts)),
|
||||
agg.aliases
|
||||
.stream()
|
||||
.map(id -> createMergedDedupAliasOafEntity(id, agg.entity, dataInfo, ts)))
|
||||
.iterator();
|
||||
}, beanEncoder);
|
||||
}
|
||||
|
||||
private static OafEntity createDedupOafEntity(String id, OafEntity base, DataInfo dataInfo, long ts) {
|
||||
try {
|
||||
OafEntity res = (OafEntity) BeanUtils.cloneBean(base);
|
||||
res.setId(id);
|
||||
res.setDataInfo(dataInfo);
|
||||
res.setLastupdatetimestamp(ts);
|
||||
return res;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static OafEntity createMergedDedupAliasOafEntity(String id, OafEntity base, DataInfo dataInfo, long ts) {
|
||||
try {
|
||||
OafEntity res = createDedupOafEntity(id, base, dataInfo, ts);
|
||||
DataInfo ds = (DataInfo) BeanUtils.cloneBean(dataInfo);
|
||||
ds.setDeletedbyinference(true);
|
||||
res.setDataInfo(ds);
|
||||
return res;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) {
|
||||
|
||||
if (duplicate == null) {
|
||||
|
|
|
@ -23,10 +23,15 @@ class CrossrefMappingTest {
|
|||
val mapper = new ObjectMapper()
|
||||
|
||||
@Test
|
||||
def testMissingAuthorParser():Unit = {
|
||||
val json: String = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json")).mkString
|
||||
def testMissingAuthorParser(): Unit = {
|
||||
val json: String = Source
|
||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json"))
|
||||
.mkString
|
||||
val result = Crossref2Oaf.convert(json)
|
||||
result.filter(o => o.isInstanceOf[Publication]).map(p=> p.asInstanceOf[Publication]).foreach(p =>assertTrue(p.getAuthor.size()>0))
|
||||
result
|
||||
.filter(o => o.isInstanceOf[Publication])
|
||||
.map(p => p.asInstanceOf[Publication])
|
||||
.foreach(p => assertTrue(p.getAuthor.size() > 0))
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -30,7 +30,7 @@ public class MoveResult implements Serializable {
|
|||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
MoveResult.class
|
||||
MoveResult.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json"));
|
||||
|
||||
|
|
|
@ -114,7 +114,7 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
|
@ -142,7 +142,7 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
|
@ -170,7 +170,7 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
|
@ -198,7 +198,7 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
|
@ -225,8 +225,8 @@
|
|||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/orcid/orcidprop</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/orcid/targetOrcidAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
|
||||
</spark>
|
||||
<ok to="fork-join-exec-propagation"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -247,9 +247,10 @@
|
|||
<class>eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=4
|
||||
--executor-memory=4G
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=5G
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
|
@ -259,9 +260,9 @@
|
|||
--conf spark.speculation=false
|
||||
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
--conf spark.sql.shuffle.partitions=15000
|
||||
</spark-opts>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||
|
@ -291,7 +292,7 @@
|
|||
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
</spark-opts>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||
|
@ -321,7 +322,7 @@
|
|||
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
</spark-opts>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
|
@ -351,7 +352,7 @@
|
|||
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
</spark-opts>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcid/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||
|
|
|
@ -317,7 +317,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
listKeyValues(
|
||||
createOpenaireId(10, rs.getString("collectedfromid"), true),
|
||||
rs.getString("collectedfromname")));
|
||||
p.setPid(new ArrayList<>());
|
||||
p.setPid(prepareListOfStructProps(rs.getArray("pid"), info));
|
||||
p.setDateofcollection(asString(rs.getDate("dateofcollection")));
|
||||
p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
|
||||
p.setExtraInfo(new ArrayList<>()); // Values not present in the DB
|
||||
|
|
|
@ -238,11 +238,23 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
(Element) doc
|
||||
.selectSingleNode(
|
||||
"//*[local-name()='metadata']/*[local-name() = 'resource']/*[local-name() = 'resourceType']"))
|
||||
.map(element -> {
|
||||
final String resourceTypeURI = element.attributeValue("uri");
|
||||
final String resourceTypeAnyURI = element.attributeValue("anyURI");
|
||||
final String resourceTypeTxt = element.getText();
|
||||
final String resourceTypeGeneral = element.attributeValue("resourceTypeGeneral");
|
||||
.map(e -> {
|
||||
final String resourceTypeURI = Optional
|
||||
.ofNullable(e.attributeValue("uri"))
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.orElse(null);
|
||||
final String resourceTypeAnyURI = Optional
|
||||
.ofNullable(e.attributeValue("anyURI"))
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.orElse(null);
|
||||
final String resourceTypeTxt = Optional
|
||||
.ofNullable(e.getText())
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.orElse(null);
|
||||
final String resourceTypeGeneral = Optional
|
||||
.ofNullable(e.attributeValue("resourceTypeGeneral"))
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.orElse(null);
|
||||
|
||||
return ObjectUtils
|
||||
.firstNonNull(resourceTypeURI, resourceTypeAnyURI, resourceTypeTxt, resourceTypeGeneral);
|
||||
|
|
|
@ -49,10 +49,10 @@
|
|||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${graphPath}/datasource"/>
|
||||
<delete path="${graphPath}/organization"/>
|
||||
<delete path="${graphPath}/project"/>
|
||||
<delete path="${graphPath}/relation"/>
|
||||
<delete path="${targetPath}/datasource"/>
|
||||
<delete path="${targetPath}/organization"/>
|
||||
<delete path="${targetPath}/project"/>
|
||||
<delete path="${targetPath}/relation"/>
|
||||
</fs>
|
||||
<ok to="copy_datasource"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -33,7 +33,7 @@ SELECT
|
|||
dc.officialname AS collectedfromname,
|
||||
p.contracttype || '@@@' || p.contracttypescheme AS contracttype,
|
||||
p.provenanceactionclass || '@@@' || p.provenanceactionscheme AS provenanceaction,
|
||||
array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid,
|
||||
array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid,,
|
||||
array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects,
|
||||
array_agg(DISTINCT fp.path) AS fundingtree
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ SELECT
|
|||
dc.officialname AS collectedfromname,
|
||||
p.contracttypeclass || '@@@' || p.contracttypescheme AS contracttype,
|
||||
p.provenanceactionclass || '@@@' || p.provenanceactionscheme AS provenanceaction,
|
||||
array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid,
|
||||
array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid,
|
||||
array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects,
|
||||
array_agg(DISTINCT fp.path) AS fundingtree
|
||||
FROM projects p
|
||||
|
|
|
@ -93,8 +93,8 @@ object CopyHdfsOafSparkApplication {
|
|||
hasSource != null && hasTarget != null
|
||||
} else {
|
||||
val hasId = (json \ "id").extractOrElse[String](null)
|
||||
val resultType = (json \ "resulttype" \ "classid").extractOrElse[String](null)
|
||||
hasId != null && oafType.equalsIgnoreCase(resultType)
|
||||
val resultType = (json \ "resulttype" \ "classid").extractOrElse[String]("")
|
||||
hasId != null && oafType.startsWith(resultType)
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -59,7 +59,19 @@ public class CopyHdfsOafSparkApplicationTest {
|
|||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json")),
|
||||
"publication"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void isOafType_Datacite_ORP() throws IOException {
|
||||
assertTrue(
|
||||
CopyHdfsOafSparkApplication
|
||||
.isOafType(
|
||||
IOUtils
|
||||
.toString(
|
||||
getClass()
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/raw/datacite_orp.json")),
|
||||
"otherresearchproduct"));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1171,6 +1171,34 @@ class MappersTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
void test_Zenodo2() throws IOException {
|
||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_zenodo2.xml")));
|
||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||
|
||||
assertEquals(3, list.size());
|
||||
Publication p = cleanup((Publication) list.get(0), vocs);
|
||||
|
||||
assertNotNull(p.getInstance());
|
||||
assertEquals(1, p.getInstance().size());
|
||||
|
||||
final Instance instance = p.getInstance().get(0);
|
||||
|
||||
assertNotNull(instance.getInstanceTypeMapping());
|
||||
assertEquals(1, instance.getInstanceTypeMapping().size());
|
||||
|
||||
Optional<InstanceTypeMapping> coarType = instance
|
||||
.getInstanceTypeMapping()
|
||||
.stream()
|
||||
.filter(itm -> ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()))
|
||||
.findFirst();
|
||||
|
||||
assertTrue(coarType.isPresent());
|
||||
assertNotNull(coarType.get().getOriginalType());
|
||||
assertNull(coarType.get().getTypeCode());
|
||||
assertNull(coarType.get().getTypeLabel());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testROHub2() throws IOException {
|
||||
final String xml = IOUtils
|
||||
|
@ -1229,7 +1257,7 @@ class MappersTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testD4ScienceTraining() throws IOException {
|
||||
void testD4ScienceTraining() throws IOException {
|
||||
final String xml = IOUtils
|
||||
.toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-1-training.xml")));
|
||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||
|
@ -1240,7 +1268,7 @@ class MappersTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testD4ScienceDataset() throws IOException {
|
||||
void testD4ScienceDataset() throws IOException {
|
||||
final String xml = IOUtils
|
||||
.toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-2-dataset.xml")));
|
||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||
|
@ -1250,6 +1278,22 @@ class MappersTest {
|
|||
System.out.println("***************");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testIRISPub() throws IOException, DocumentException {
|
||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("iris-odf.xml")));
|
||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||
System.out.println("***************");
|
||||
System.out.println(new ObjectMapper().writeValueAsString(list));
|
||||
System.out.println("***************");
|
||||
final Publication p = (Publication) list.get(0);
|
||||
assertNotNull(p.getInstance().get(0).getUrl().get(0));
|
||||
assertValidId(p.getId());
|
||||
System.out.println(p.getInstance().get(0).getUrl());
|
||||
p.getPid().forEach(x -> System.out.println(x.getValue()));
|
||||
p.getInstance().get(0).getAlternateIdentifier().forEach(x -> System.out.println(x.getValue()));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testNotWellFormed() throws IOException {
|
||||
final String xml = IOUtils
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,215 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<record xmlns:datacite="http://datacite.org/schema/kernel-4"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<oai:header xmlns="http://namespace.openaire.eu/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
|
||||
<identifier>oai:air.unimi.it:2434/907506</identifier>
|
||||
<datestamp>2024-01-04T12:42:51Z</datestamp>
|
||||
<setSpec>com_2434_73555</setSpec>
|
||||
<setSpec>col_2434_73557</setSpec>
|
||||
<setSpec>openaire</setSpec>
|
||||
<dr:dateOfTransformation>2024-01-29T16:56:50.632Z</dr:dateOfTransformation>
|
||||
|
||||
<dri:objIdentifier>od______1261::ff2d9e058e7bea90a27f41c31078e601</dri:objIdentifier>
|
||||
<dri:recordIdentifier>oai:air.unimi.it:2434/907506</dri:recordIdentifier>
|
||||
<dri:dateOfCollection/>
|
||||
<dri:mdFormat/>
|
||||
<dri:mdFormatInterpretation/>
|
||||
<dri:repositoryId/>
|
||||
<oaf:datasourceprefix> od______1261</oaf:datasourceprefix>
|
||||
</oai:header>
|
||||
<metadata>
|
||||
<oaire:resource xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
|
||||
xmlns:exslt="http://exslt.org/common"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:rdf="http://www.w3.org/TR/rdf-concepts/"
|
||||
xmlns:doc="http://www.lyncode.com/xoai"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-4"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:vc="http://www.w3.org/2007/XMLSchema-versioning"
|
||||
xmlns="http://www.openarchives.org/OAI/2.0/"
|
||||
xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
|
||||
<datacite:titles>
|
||||
<datacite:title xml:lang="en">Ensuring tests of conservation interventions build on existing literature</datacite:title>
|
||||
</datacite:titles>
|
||||
<datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>W.J. Sutherland</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>S.T. Alvarez-Castaneda</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>T. Amano</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>R. Ambrosini</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>P. Atkinson</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>J.M. Baxter</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>A.L. Bond</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>P.J. Boon</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>K.L. Buchanan</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>J. Barlow</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>G. Bogliani</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>O.M. Bragg</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M. Burgman</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M.W. Cadotte</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M. Calver</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>S.J. Cooke</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>R.T. Corlett</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>V. Devictor</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>J.G. Ewen</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M. Fisher</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>G. Freeman</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>E. Game</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>B.J. Godley</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>C. Gortazar</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>I.R. Hartley</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>D.L. Hawksworth</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>K.A. Hobson</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M.-. Lu</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>B. Martin-Lopez</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>K. Ma</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>A. Machado</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>D. Mae</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M. Mangiacotti</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>D.J. Mccafferty</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>V. Melfi</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>S. Molur</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>A.J. Moore</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>S.D. Murphy</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>D. Norri</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>A.P.E. van Oudenhoven</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>J. Power</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>E.C. Ree</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>M.W. Schwartz</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>I. Storch</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>C. Wordley</datacite:creatorName>
|
||||
</datacite:creator>
|
||||
</datacite:creator>
|
||||
<datacite:relatedIdentifiers>
|
||||
</datacite:relatedIdentifiers>
|
||||
<datacite:dates>
|
||||
<datacite:date dateType="Accepted">2020</datacite:date>
|
||||
<datacite:date dateType="Issued">2020</datacite:date>
|
||||
<datacite:date dateType="Available">2022-06-20</datacite:date>
|
||||
</datacite:dates>
|
||||
<dc:language>eng</dc:language>
|
||||
<dc:publisher>Wiley Blackwell Publishing</dc:publisher>
|
||||
<oaire:resourceType resourceTypeGeneral="literature"
|
||||
uri="http://purl.org/coar/resource_type/c_6501">journal article</oaire:resourceType>
|
||||
<dc:format>application/pdf</dc:format>
|
||||
<datacite:identifier xmlns:datacite="http://datacite.org/schema/kernel-3"
|
||||
identifierType="Handle">2434/907506</datacite:identifier>
|
||||
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
|
||||
<datacite:subjects>
|
||||
<datacite:subject>Conservation of Natural Resources</datacite:subject>
|
||||
</datacite:subjects>
|
||||
<datacite:sizes/>
|
||||
<datacite:sizes/>
|
||||
<datacite:sizes>
|
||||
<datacite:size>191802 bytes</datacite:size>
|
||||
</datacite:sizes>
|
||||
<oaire:file accessRightsURI="" mimeType="application/pdf" objectType="fulltext">https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf</oaire:file>
|
||||
</oaire:resource>
|
||||
<oaf:identifier identifierType="DOI">10.1111/cobi.13555</oaf:identifier>
|
||||
<oaf:identifier identifierType="PMID">32779884</oaf:identifier>
|
||||
<oaf:fulltext>https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf</oaf:fulltext>
|
||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||
<oaf:dateAccepted>2020-01-01</oaf:dateAccepted>
|
||||
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||
<oaf:language>eng</oaf:language>
|
||||
<oaf:hostedBy name="Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano"
|
||||
id="opendoar____::1261"/>
|
||||
<oaf:collectedFrom name="Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano"
|
||||
id="opendoar____::1261"/>
|
||||
</metadata>
|
||||
</record>
|
|
@ -0,0 +1,59 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<record xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-3"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri">
|
||||
<header xmlns="http://www.openarchives.org/OAI/2.0/">
|
||||
<identifier>oai:zenodo.org:1596086</identifier>
|
||||
<datestamp>2020-01-20T13:50:28Z</datestamp>
|
||||
<setSpec>openaire</setSpec>
|
||||
<dr:dateOfTransformation>2024-02-08T11:03:10.994Z</dr:dateOfTransformation>
|
||||
<dri:objIdentifier>od______2659::036d5555a6688ed00c8d0da97bdece3b</dri:objIdentifier>
|
||||
<dri:dateOfCollection>2024-02-08T11:03:10.994Z</dri:dateOfCollection>
|
||||
<dri:dateOfTransformation>2024-02-08T11:03:10.994Z</dri:dateOfTransformation>
|
||||
</header>
|
||||
<metadata>
|
||||
<resource xmlns="http://datacite.org/schema/kernel-4"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.1/metadata.xsd">
|
||||
<identifier identifierType="URL">https://zenodo.org/record/1596086</identifier>
|
||||
<alternateIdentifiers xmlns="http://datacite.org/schema/kernel-3"/>
|
||||
<creators>
|
||||
<creator>
|
||||
<creatorName>Bonney, T. G.</creatorName>
|
||||
<givenName>T. G.</givenName>
|
||||
<familyName>Bonney</familyName>
|
||||
</creator>
|
||||
</creators>
|
||||
<titles>
|
||||
<title>Ice Blocks on a Moraine</title>
|
||||
</titles>
|
||||
<publisher>Zenodo</publisher>
|
||||
<publicationYear>1889</publicationYear>
|
||||
<dates>
|
||||
<date dateType="Issued">1889-08-22</date>
|
||||
</dates>
|
||||
<resourceType resourceTypeGeneral="JournalArticle"/>
|
||||
<relatedIdentifiers>
|
||||
<relatedIdentifier relatedIdentifierType="DOI" relationType="IsIdenticalTo"
|
||||
>10.1038/040391a0</relatedIdentifier>
|
||||
</relatedIdentifiers>
|
||||
<rightsList>
|
||||
<rights rightsURI="https://creativecommons.org/publicdomain/zero/1.0/legalcode"
|
||||
>Creative Commons Zero v1.0 Universal</rights>
|
||||
<rights rightsURI="info:eu-repo/semantics/openAccess">Open Access</rights>
|
||||
</rightsList>
|
||||
<descriptions>
|
||||
<description descriptionType="Abstract">n/a</description>
|
||||
</descriptions>
|
||||
</resource>
|
||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||
<oaf:dateAccepted>1889-08-22</oaf:dateAccepted>
|
||||
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||
<oaf:license>http://creativecommons.org/publicdomain/zero/1.0/legalcode</oaf:license>
|
||||
<oaf:language/>
|
||||
<oaf:hostedBy name="ZENODO" id="opendoar____::2659"/>
|
||||
<oaf:collectedFrom name="ZENODO" id="opendoar____::2659"/>
|
||||
</metadata>
|
||||
</record>
|
|
@ -244,4 +244,27 @@ public class XmlRecordFactoryTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIrisGuidelines4() throws DocumentException, IOException {
|
||||
final ContextMapper contextMapper = new ContextMapper();
|
||||
|
||||
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||
XmlConverterJob.schemaLocation);
|
||||
|
||||
final Publication p = OBJECT_MAPPER
|
||||
.readValue(
|
||||
IOUtils.toString(getClass().getResourceAsStream("iris-odf-4.json")),
|
||||
Publication.class);
|
||||
|
||||
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
||||
|
||||
assertNotNull(xml);
|
||||
|
||||
final Document doc = new SAXReader().read(new StringReader(xml));
|
||||
|
||||
assertNotNull(doc);
|
||||
System.out.println(doc.asXML());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue