Merge branch 'beta' into bulkTaggingPathMapExtention
commit
91b61687fa
@ -0,0 +1,79 @@
|
||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||
|
||||
//
|
||||
// Source code recreated from a .class file by IntelliJ IDEA
|
||||
// (powered by FernFlower decompiler)
|
||||
//
|
||||
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class MergeComparator implements Comparator<Oaf> {
|
||||
public MergeComparator() {
|
||||
}
|
||||
|
||||
public int compare(Oaf left, Oaf right) {
|
||||
// nulls at the end
|
||||
if (left == null && right == null) {
|
||||
return 0;
|
||||
} else if (left == null) {
|
||||
return -1;
|
||||
} else if (right == null) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// invisible
|
||||
if (left.getDataInfo() != null && left.getDataInfo().getInvisible() == true) {
|
||||
if (right.getDataInfo() != null && right.getDataInfo().getInvisible() == false) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// collectedfrom
|
||||
HashSet<String> lCf = getCollectedFromIds(left);
|
||||
HashSet<String> rCf = getCollectedFromIds(right);
|
||||
if (lCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2") && !rCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")) {
|
||||
return -1;
|
||||
} else if (!lCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2") && rCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
SubEntityType lClass = SubEntityType.fromClass(left.getClass());
|
||||
SubEntityType rClass = SubEntityType.fromClass(right.getClass());
|
||||
return lClass.ordinal() - rClass.ordinal();
|
||||
|
||||
}
|
||||
|
||||
protected HashSet<String> getCollectedFromIds(Oaf left) {
|
||||
return (HashSet) Optional.ofNullable(left.getCollectedfrom()).map((cf) -> {
|
||||
return (HashSet) cf.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
|
||||
}).orElse(new HashSet());
|
||||
}
|
||||
|
||||
enum SubEntityType {
|
||||
publication, dataset, software, otherresearchproduct, datasource, organization, project;
|
||||
|
||||
/**
|
||||
* Resolves the EntityType, given the relative class name
|
||||
*
|
||||
* @param clazz the given class name
|
||||
* @param <T> actual OafEntity subclass
|
||||
* @return the EntityType associated to the given class
|
||||
*/
|
||||
public static <T extends Oaf> SubEntityType fromClass(Class<T> clazz) {
|
||||
return valueOf(clazz.getSimpleName().toLowerCase());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,707 @@
|
||||
|
||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.tuple.ImmutablePair;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static com.google.common.base.Objects.firstNonNull;
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
|
||||
public class MergeUtils {
|
||||
|
||||
public static <T extends Oaf> T checkedMerge(final T left, final T right) {
|
||||
return (T) merge(left, right, false);
|
||||
}
|
||||
|
||||
public static Oaf merge(final Oaf left, final Oaf right) {
|
||||
return merge(left, right, false);
|
||||
}
|
||||
|
||||
public static Oaf merge(final Oaf left, final Oaf right, boolean checkDelegatedAuthority) {
|
||||
if (sameClass(left, right, OafEntity.class)) {
|
||||
return mergeEntities(left, right, checkDelegatedAuthority);
|
||||
} else if (sameClass(left, right, Relation.class)) {
|
||||
return mergeRelation((Relation) left, (Relation) right);
|
||||
} else {
|
||||
throw new RuntimeException(
|
||||
String
|
||||
.format(
|
||||
"MERGE_FROM_AND_GET incompatible types: %s, %s",
|
||||
left.getClass().getCanonicalName(), right.getClass().getCanonicalName()));
|
||||
}
|
||||
}
|
||||
|
||||
private static <T extends Oaf> boolean sameClass(Object left, Object right, Class<T> cls) {
|
||||
return cls.isAssignableFrom(left.getClass()) && cls.isAssignableFrom(right.getClass());
|
||||
}
|
||||
|
||||
private static Oaf mergeEntities(Oaf left, Oaf right, boolean checkDelegatedAuthority) {
|
||||
|
||||
if (sameClass(left, right, Result.class)) {
|
||||
if (!left.getClass().equals(right.getClass()) || checkDelegatedAuthority) {
|
||||
return mergeResultsOfDifferentTypes((Result)left, (Result) right);
|
||||
}
|
||||
|
||||
if (sameClass(left, right, Publication.class)) {
|
||||
return mergePublication((Publication) left, (Publication) right);
|
||||
}
|
||||
if (sameClass(left, right, Dataset.class)) {
|
||||
return mergeDataset((Dataset) left, (Dataset) right);
|
||||
}
|
||||
if (sameClass(left, right, OtherResearchProduct.class)) {
|
||||
return mergeORP((OtherResearchProduct) left, (OtherResearchProduct) right);
|
||||
}
|
||||
if (sameClass(left, right, Software.class)) {
|
||||
return mergeSoftware((Software) left, (Software) right);
|
||||
}
|
||||
|
||||
return mergeResult((Result) left, (Result) right);
|
||||
} else if (sameClass(left, right, Datasource.class)) {
|
||||
// TODO
|
||||
final int trust = compareTrust(left, right);
|
||||
return mergeOafEntityFields((Datasource) left, (Datasource) right, trust);
|
||||
} else if (sameClass(left, right, Organization.class)) {
|
||||
return mergeOrganization((Organization) left, (Organization) right);
|
||||
} else if (sameClass(left, right, Project.class)) {
|
||||
return mergeProject((Project) left, (Project) right);
|
||||
} else {
|
||||
throw new RuntimeException(
|
||||
String
|
||||
.format(
|
||||
"MERGE_FROM_AND_GET incompatible types: %s, %s",
|
||||
left.getClass().getCanonicalName(), right.getClass().getCanonicalName()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used in the global result grouping phase. It checks if one of the two is from a delegated authority
|
||||
* https://graph.openaire.eu/docs/data-model/pids-and-identifiers#delegated-authorities and in that case it prefers
|
||||
* such version.
|
||||
* <p>
|
||||
* Otherwise, it considers a resulttype priority order implemented in {@link ResultTypeComparator}
|
||||
* and proceeds with the canonical property merging.
|
||||
*
|
||||
* @param left
|
||||
* @param right
|
||||
* @return
|
||||
*/
|
||||
private static <T extends Result> T mergeResultsOfDifferentTypes(T left, T right) {
|
||||
|
||||
final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority(left);
|
||||
final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority(right);
|
||||
|
||||
if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) {
|
||||
return left;
|
||||
}
|
||||
if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) {
|
||||
return right;
|
||||
}
|
||||
//TODO: raise trust to have preferred fields from one or the other??
|
||||
if (new ResultTypeComparator().compare(left, right) < 0) {
|
||||
return mergeResult(left, right);
|
||||
} else {
|
||||
return mergeResult(right, left);
|
||||
}
|
||||
}
|
||||
|
||||
private static DataInfo chooseDataInfo(DataInfo left, DataInfo right, int trust) {
|
||||
if (trust > 0) {
|
||||
return left;
|
||||
} else if (trust == 0) {
|
||||
if (left == null || (left.getInvisible() != null && left.getInvisible().equals(Boolean.TRUE))) {
|
||||
return right;
|
||||
} else {
|
||||
return left;
|
||||
}
|
||||
} else {
|
||||
return right;
|
||||
}
|
||||
}
|
||||
|
||||
private static String chooseString(String left, String right, int trust) {
|
||||
if (trust > 0) {
|
||||
return left;
|
||||
} else if (trust == 0) {
|
||||
return StringUtils.isNotBlank(left) ? left : right;
|
||||
} else {
|
||||
return right;
|
||||
}
|
||||
}
|
||||
|
||||
private static <T> T chooseReference(T left, T right, int trust) {
|
||||
if (trust > 0) {
|
||||
return left;
|
||||
} else if (trust == 0) {
|
||||
return left != null ? left : right;
|
||||
} else {
|
||||
return right;
|
||||
}
|
||||
}
|
||||
|
||||
private static Long max(Long left, Long right) {
|
||||
if (left == null)
|
||||
return right;
|
||||
if (right == null)
|
||||
return left;
|
||||
|
||||
return Math.max(left, right);
|
||||
}
|
||||
|
||||
// trust ??
|
||||
private static Boolean booleanOR(Boolean a, Boolean b) {
|
||||
if (a == null) {
|
||||
return b;
|
||||
} else if (b == null) {
|
||||
return a;
|
||||
}
|
||||
|
||||
return a || b;
|
||||
}
|
||||
|
||||
|
||||
private static <T> List<T> unionDistinctLists(final List<T> left, final List<T> right, int trust) {
|
||||
if (left == null) {
|
||||
return right;
|
||||
} else if (right == null) {
|
||||
return left;
|
||||
}
|
||||
|
||||
List<T> h = trust >= 0 ? left : right;
|
||||
List<T> l = trust >= 0 ? right : left;
|
||||
|
||||
return Stream.concat(h.stream(), l.stream())
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static List<String> unionDistinctListOfString(final List<String> l, final List<String> r) {
|
||||
if (l == null) {
|
||||
return r;
|
||||
} else if (r == null) {
|
||||
return l;
|
||||
}
|
||||
|
||||
return Stream.concat(l.stream(), r.stream())
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
//TODO review
|
||||
private static List<KeyValue> mergeKeyValue(List<KeyValue> left, List<KeyValue> right, int trust) {
|
||||
if (trust < 0) {
|
||||
List<KeyValue> s = left;
|
||||
left = right;
|
||||
right = s;
|
||||
}
|
||||
|
||||
HashMap<String, KeyValue> values = new HashMap<>();
|
||||
left.forEach(kv -> values.put(kv.getKey(), kv));
|
||||
right.forEach(kv -> values.putIfAbsent(kv.getKey(), kv));
|
||||
|
||||
return new ArrayList<>(values.values());
|
||||
}
|
||||
|
||||
private static List<StructuredProperty> unionTitle(List<StructuredProperty> left, List<StructuredProperty> right, int trust) {
|
||||
if (left == null) {
|
||||
return right;
|
||||
} else if (right == null) {
|
||||
return left;
|
||||
}
|
||||
|
||||
List<StructuredProperty> h = trust >= 0 ? left : right;
|
||||
List<StructuredProperty> l = trust >= 0 ? right : left;
|
||||
|
||||
return Stream.concat(h.stream(), l.stream())
|
||||
.filter(Objects::isNull)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal utility that merges the common OafEntity fields
|
||||
*
|
||||
* @param merged
|
||||
* @param enrich
|
||||
* @param <T>
|
||||
* @return
|
||||
*/
|
||||
private static <T extends Oaf> T mergeOafFields(T merged, T enrich, int trust) {
|
||||
|
||||
//TODO: union of all values, but what does it mean with KeyValue pairs???
|
||||
merged.setCollectedfrom(mergeKeyValue(merged.getCollectedfrom(), enrich.getCollectedfrom(), trust));
|
||||
merged.setDataInfo(chooseDataInfo(merged.getDataInfo(), enrich.getDataInfo(), trust));
|
||||
merged.setLastupdatetimestamp(max(merged.getLastupdatetimestamp(), enrich.getLastupdatetimestamp()));
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal utility that merges the common OafEntity fields
|
||||
*
|
||||
* @param original
|
||||
* @param enrich
|
||||
* @param <T>
|
||||
* @return
|
||||
*/
|
||||
private static <T extends OafEntity> T mergeOafEntityFields(T original, T enrich, int trust) {
|
||||
final T merged = mergeOafFields(original, enrich, trust);
|
||||
|
||||
merged.setOriginalId(unionDistinctListOfString(merged.getOriginalId(), enrich.getOriginalId()));
|
||||
merged.setPid(unionDistinctLists(merged.getPid(), enrich.getPid(), trust));
|
||||
// dateofcollection mettere today quando si fa merge
|
||||
merged.setDateofcollection(chooseString(merged.getDateofcollection(), enrich.getDateofcollection(), trust));
|
||||
// setDateoftransformation mettere vuota in dedup, nota per Claudio
|
||||
merged.setDateoftransformation(chooseString(merged.getDateoftransformation(), enrich.getDateoftransformation(), trust));
|
||||
// TODO: was missing in OafEntity.merge
|
||||
merged.setExtraInfo(unionDistinctLists(merged.getExtraInfo(), enrich.getExtraInfo(), trust));
|
||||
//oaiprovenanze da mettere a null quando si genera merge
|
||||
merged.setOaiprovenance(chooseReference(merged.getOaiprovenance(), enrich.getOaiprovenance(), trust));
|
||||
merged.setMeasures(unionDistinctLists(merged.getMeasures(), enrich.getMeasures(), trust));
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
|
||||
public static <T extends Relation> T mergeRelation(T original, T enrich) {
|
||||
int trust = compareTrust(original, enrich);
|
||||
T merge = mergeOafFields(original, enrich, trust);
|
||||
|
||||
checkArgument(Objects.equals(merge.getSource(), enrich.getSource()), "source ids must be equal");
|
||||
checkArgument(Objects.equals(merge.getTarget(), enrich.getTarget()), "target ids must be equal");
|
||||
checkArgument(Objects.equals(merge.getRelType(), enrich.getRelType()), "relType(s) must be equal");
|
||||
checkArgument(
|
||||
Objects.equals(merge.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal");
|
||||
checkArgument(Objects.equals(merge.getRelClass(), enrich.getRelClass()), "relClass(es) must be equal");
|
||||
|
||||
//merge.setProvenance(mergeLists(merge.getProvenance(), enrich.getProvenance()));
|
||||
|
||||
//TODO: trust ??
|
||||
merge.setValidated(booleanOR(merge.getValidated(), enrich.getValidated()));
|
||||
try {
|
||||
merge.setValidationDate(ModelSupport.oldest(merge.getValidationDate(), enrich.getValidationDate()));
|
||||
} catch (ParseException e) {
|
||||
throw new IllegalArgumentException(String
|
||||
.format(
|
||||
"invalid validation date format in relation [s:%s, t:%s]: %s", merge.getSource(),
|
||||
merge.getTarget(),
|
||||
merge.getValidationDate()));
|
||||
}
|
||||
|
||||
// TODO keyvalue merge
|
||||
merge.setProperties(mergeKeyValue(merge.getProperties(), enrich.getProperties(), trust));
|
||||
|
||||
return merge;
|
||||
}
|
||||
|
||||
public static <T extends Result> T mergeResult(T original, T enrich) {
|
||||
final int trust = compareTrust(original, enrich);
|
||||
T merge = mergeOafEntityFields(original, enrich, trust);
|
||||
|
||||
if (merge.getProcessingchargeamount() == null || StringUtils.isBlank(merge.getProcessingchargeamount().getValue())) {
|
||||
merge.setProcessingchargeamount(enrich.getProcessingchargeamount());
|
||||
merge.setProcessingchargecurrency(enrich.getProcessingchargecurrency());
|
||||
}
|
||||
|
||||
// author = usare la stessa logica che in dedup
|
||||
merge.setAuthor(chooseReference(merge.getAuthor(), enrich.getAuthor(), trust));
|
||||
// il primo che mi arriva secondo l'ordinamento per priorita'
|
||||
merge.setResulttype(chooseReference(merge.getResulttype(), enrich.getResulttype(), trust));
|
||||
// gestito come il resulttype perche' e' un subtype
|
||||
merge.setMetaResourceType(chooseReference(merge.getMetaResourceType(), enrich.getMetaResourceType(), trust));
|
||||
// spostiamo nell'instance e qui prendo il primo che arriva
|
||||
merge.setLanguage(chooseReference(merge.getLanguage(), enrich.getLanguage(), trust));
|
||||
// country lasicamo,o cosi' -> parentesi sul datainfo
|
||||
merge.setCountry(unionDistinctLists(merge.getCountry(), enrich.getCountry(), trust));
|
||||
//ok
|
||||
merge.setSubject(unionDistinctLists(merge.getSubject(), enrich.getSubject(), trust));
|
||||
// union per priority quindi vanno in append
|
||||
merge.setTitle(unionTitle(merge.getTitle(), enrich.getTitle(), trust));
|
||||
//ok
|
||||
merge.setRelevantdate(unionDistinctLists(merge.getRelevantdate(), enrich.getRelevantdate(), trust));
|
||||
// prima trust e poi longest list
|
||||
merge.setDescription(longestLists(merge.getDescription(), enrich.getDescription()));
|
||||
// trust piu' alto e poi piu' vecchia
|
||||
merge.setDateofacceptance(chooseReference(merge.getDateofacceptance(), enrich.getDateofacceptance(), trust));
|
||||
// ok, ma publisher va messo ripetibile
|
||||
merge.setPublisher(chooseReference(merge.getPublisher(), enrich.getPublisher(), trust));
|
||||
// ok
|
||||
merge.setEmbargoenddate(chooseReference(merge.getEmbargoenddate(), enrich.getEmbargoenddate(), trust));
|
||||
// ok
|
||||
merge.setSource(unionDistinctLists(merge.getSource(), enrich.getSource(), trust));
|
||||
// ok
|
||||
merge.setFulltext(unionDistinctLists(merge.getFulltext(), enrich.getFulltext(), trust));
|
||||
// ok
|
||||
merge.setFormat(unionDistinctLists(merge.getFormat(), enrich.getFormat(), trust));
|
||||
// ok
|
||||
merge.setContributor(unionDistinctLists(merge.getContributor(), enrich.getContributor(), trust));
|
||||
|
||||
// prima prendo l'higher trust, su questo prendo il valore migliore nelle istanze TODO
|
||||
// trust maggiore ma a parita' di trust il piu' specifico (base del vocabolario)
|
||||
// vedi note
|
||||
merge.setResourcetype(firstNonNull(merge.getResourcetype(), enrich.getResourcetype()));
|
||||
|
||||
// ok
|
||||
merge.setCoverage(unionDistinctLists(merge.getCoverage(), enrich.getCoverage(), trust));
|
||||
|
||||
// most open ok
|
||||
if (enrich.getBestaccessright() != null
|
||||
&& new AccessRightComparator<>()
|
||||
.compare(enrich.getBestaccessright(), merge.getBestaccessright()) < 0) {
|
||||
merge.setBestaccessright(enrich.getBestaccessright());
|
||||
}
|
||||
|
||||
// TODO merge of datainfo given same id
|
||||
merge.setContext(unionDistinctLists(merge.getContext(), enrich.getContext(), trust));
|
||||
|
||||
//ok
|
||||
merge.setExternalReference(unionDistinctLists(merge.getExternalReference(), enrich.getExternalReference(), trust));
|
||||
|
||||
//instance enrichment or union
|
||||
// review instance equals => add pid to comparision
|
||||
if (!isAnEnrichment(merge) && !isAnEnrichment(enrich))
|
||||
merge.setInstance(unionDistinctLists(merge.getInstance(), enrich.getInstance(), trust));
|
||||
else {
|
||||
final List<Instance> enrichmentInstances = isAnEnrichment(merge) ? merge.getInstance()
|
||||
: enrich.getInstance();
|
||||
final List<Instance> enrichedInstances = isAnEnrichment(merge) ? enrich.getInstance()
|
||||
: merge.getInstance();
|
||||
if (isAnEnrichment(merge))
|
||||
merge.setDataInfo(enrich.getDataInfo());
|
||||
merge.setInstance(enrichInstances(enrichedInstances, enrichmentInstances));
|
||||
}
|
||||
|
||||
merge.setEoscifguidelines(unionDistinctLists(merge.getEoscifguidelines(), enrich.getEoscifguidelines(), trust));
|
||||
merge.setIsGreen(booleanOR(merge.getIsGreen(), enrich.getIsGreen()));
|
||||
// OK but should be list of values
|
||||
merge.setOpenAccessColor(chooseReference(merge.getOpenAccessColor(), enrich.getOpenAccessColor(), trust));
|
||||
merge.setIsInDiamondJournal(booleanOR(merge.getIsInDiamondJournal(), enrich.getIsInDiamondJournal()));
|
||||
merge.setPubliclyFunded(booleanOR(merge.getPubliclyFunded(), enrich.getPubliclyFunded()));
|
||||
|
||||
return merge;
|
||||
}
|
||||
|
||||
private static <T extends OtherResearchProduct> T mergeORP(T original, T enrich) {
|
||||
int trust = compareTrust(original, enrich);
|
||||
final T merge = mergeResult(original, enrich);
|
||||
|
||||
merge.setContactperson(unionDistinctLists(merge.getContactperson(), enrich.getContactperson(), trust));
|
||||
merge.setContactgroup(unionDistinctLists(merge.getContactgroup(), enrich.getContactgroup(), trust));
|
||||
merge.setTool(unionDistinctLists(merge.getTool(), enrich.getTool(), trust));
|
||||
|
||||
return merge;
|
||||
}
|
||||
|
||||
private static <T extends Software> T mergeSoftware(T original, T enrich) {
|
||||
int trust = compareTrust(original, enrich);
|
||||
final T merge = mergeResult(original, enrich);
|
||||
|
||||
merge.setDocumentationUrl(unionDistinctLists(merge.getDocumentationUrl(), enrich.getDocumentationUrl(), trust));
|
||||
merge.setLicense(unionDistinctLists(merge.getLicense(), enrich.getLicense(), trust));
|
||||
merge.setCodeRepositoryUrl(chooseReference(merge.getCodeRepositoryUrl(), enrich.getCodeRepositoryUrl(), trust));
|
||||
merge.setProgrammingLanguage(chooseReference(merge.getProgrammingLanguage(), enrich.getProgrammingLanguage(), trust));
|
||||
|
||||
return merge;
|
||||
}
|
||||
|
||||
private static <T extends Dataset> T mergeDataset(T original, T enrich) {
|
||||
int trust = compareTrust(original, enrich);
|
||||
T merge = mergeResult(original, enrich);
|
||||
|
||||
merge.setStoragedate(chooseReference(merge.getStoragedate(), enrich.getStoragedate(), trust));
|
||||
merge.setDevice(chooseReference(merge.getDevice(), enrich.getDevice(), trust));
|
||||
merge.setSize(chooseReference(merge.getSize(), enrich.getSize(), trust));
|
||||
merge.setVersion(chooseReference(merge.getVersion(), enrich.getVersion(), trust));
|
||||
merge.setLastmetadataupdate(chooseReference(merge.getLastmetadataupdate(), enrich.getLastmetadataupdate(), trust));
|
||||
merge.setMetadataversionnumber(chooseReference(merge.getMetadataversionnumber(), enrich.getMetadataversionnumber(), trust));
|
||||
merge.setGeolocation(unionDistinctLists(merge.getGeolocation(), enrich.getGeolocation(), trust));
|
||||
|
||||
return merge;
|
||||
}
|
||||
|
||||
public static <T extends Publication> T mergePublication(T original, T enrich) {
|
||||
final int trust = compareTrust(original, enrich);
|
||||
T merged = mergeResult(original, enrich);
|
||||
|
||||
merged.setJournal(chooseReference(merged.getJournal(), enrich.getJournal(), trust));
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
private static <T extends Organization> T mergeOrganization(T left, T enrich) {
|
||||
int trust = compareTrust(left, enrich);
|
||||
T merged = mergeOafEntityFields(left, enrich, trust);
|
||||
|
||||
merged.setLegalshortname(chooseReference(merged.getLegalshortname(), enrich.getLegalshortname(), trust));
|
||||
merged.setLegalname(chooseReference(merged.getLegalname(), enrich.getLegalname(), trust));
|
||||
merged.setAlternativeNames(unionDistinctLists(enrich.getAlternativeNames(), merged.getAlternativeNames(), trust));
|
||||
merged.setWebsiteurl(chooseReference(merged.getWebsiteurl(), enrich.getWebsiteurl(), trust));
|
||||
merged.setLogourl(chooseReference(merged.getLogourl(), enrich.getLogourl(), trust));
|
||||
merged.setEclegalbody(chooseReference(merged.getEclegalbody(), enrich.getEclegalbody(), trust));
|
||||
merged.setEclegalperson(chooseReference(merged.getEclegalperson(), enrich.getEclegalperson(), trust));
|
||||
merged.setEcnonprofit(chooseReference(merged.getEcnonprofit(), enrich.getEcnonprofit(), trust));
|
||||
merged.setEcresearchorganization(chooseReference(merged.getEcresearchorganization(), enrich.getEcresearchorganization(), trust));
|
||||
merged.setEchighereducation(chooseReference(merged.getEchighereducation(), enrich.getEchighereducation(), trust));
|
||||
merged.setEcinternationalorganizationeurinterests(chooseReference(merged.getEcinternationalorganizationeurinterests(), enrich.getEcinternationalorganizationeurinterests(), trust));
|
||||
merged.setEcinternationalorganization(chooseReference(merged.getEcinternationalorganization(), enrich.getEcinternationalorganization(), trust));
|
||||
merged.setEcenterprise(chooseReference(merged.getEcenterprise(), enrich.getEcenterprise(), trust));
|
||||
merged.setEcsmevalidated(chooseReference(merged.getEcsmevalidated(), enrich.getEcsmevalidated(), trust));
|
||||
merged.setEcnutscode(chooseReference(merged.getEcnutscode(), enrich.getEcnutscode(), trust));
|
||||
merged.setCountry(chooseReference(merged.getCountry(), enrich.getCountry(), trust));
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
public static <T extends Project> T mergeProject(T original, T enrich) {
|
||||
int trust = compareTrust(original, enrich);
|
||||
T merged = mergeOafEntityFields(original, enrich, trust);
|
||||
|
||||
merged.setWebsiteurl(chooseReference(merged.getWebsiteurl(), enrich.getWebsiteurl(), trust));
|
||||
merged.setCode(chooseReference(merged.getCode(), enrich.getCode(), trust));
|
||||
merged.setAcronym(chooseReference(merged.getAcronym(), enrich.getAcronym(), trust));
|
||||
merged.setTitle(chooseReference(merged.getTitle(), enrich.getTitle(), trust));
|
||||
merged.setStartdate(chooseReference(merged.getStartdate(), enrich.getStartdate(), trust));
|
||||
merged.setEnddate(chooseReference(merged.getEnddate(), enrich.getEnddate(), trust));
|
||||
merged.setCallidentifier(chooseReference(merged.getCallidentifier(), enrich.getCallidentifier(), trust));
|
||||
merged.setKeywords(chooseReference(merged.getKeywords(), enrich.getKeywords(), trust));
|
||||
merged.setDuration(chooseReference(merged.getDuration(), enrich.getDuration(), trust));
|
||||
merged.setEcsc39(chooseReference(merged.getEcsc39(), enrich.getEcsc39(), trust));
|
||||
merged.setOamandatepublications(chooseReference(merged.getOamandatepublications(), enrich.getOamandatepublications(), trust));
|
||||
merged.setEcarticle29_3(chooseReference(merged.getEcarticle29_3(), enrich.getEcarticle29_3(), trust));
|
||||
merged.setSubjects(unionDistinctLists(merged.getSubjects(), enrich.getSubjects(), trust));
|
||||
merged.setFundingtree(unionDistinctLists(merged.getFundingtree(), enrich.getFundingtree(), trust));
|
||||
merged.setContracttype(chooseReference(merged.getContracttype(), enrich.getContracttype(), trust));
|
||||
merged.setOptional1(chooseReference(merged.getOptional1(), enrich.getOptional1(), trust));
|
||||
merged.setOptional2(chooseReference(merged.getOptional2(), enrich.getOptional2(), trust));
|
||||
merged.setJsonextrainfo(chooseReference(merged.getJsonextrainfo(), enrich.getJsonextrainfo(), trust));
|
||||
merged.setContactfullname(chooseReference(merged.getContactfullname(), enrich.getContactfullname(), trust));
|
||||
merged.setContactfax(chooseReference(merged.getContactfax(), enrich.getContactfax(), trust));
|
||||
merged.setContactphone(chooseReference(merged.getContactphone(), enrich.getContactphone(), trust));
|
||||
merged.setContactemail(chooseReference(merged.getContactemail(), enrich.getContactemail(), trust));
|
||||
merged.setSummary(chooseReference(merged.getSummary(), enrich.getSummary(), trust));
|
||||
merged.setCurrency(chooseReference(merged.getCurrency(), enrich.getCurrency(), trust));
|
||||
|
||||
//missin in Project.merge
|
||||
merged.setTotalcost(chooseReference(merged.getTotalcost(), enrich.getTotalcost(), trust));
|
||||
merged.setFundedamount(chooseReference(merged.getFundedamount(), enrich.getFundedamount(), trust));
|
||||
|
||||
// trust ??
|
||||
if (enrich.getH2020topiccode() != null && StringUtils.isEmpty(merged.getH2020topiccode())) {
|
||||
merged.setH2020topiccode(enrich.getH2020topiccode());
|
||||
merged.setH2020topicdescription(enrich.getH2020topicdescription());
|
||||
}
|
||||
|
||||
merged.setH2020classification(unionDistinctLists(merged.getH2020classification(), enrich.getH2020classification(), trust));
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Longest lists list.
|
||||
*
|
||||
* @param a the a
|
||||
* @param b the b
|
||||
* @return the list
|
||||
*/
|
||||
public static List<Field<String>> longestLists(List<Field<String>> a, List<Field<String>> b) {
|
||||
if (a == null || b == null)
|
||||
return a == null ? b : a;
|
||||
|
||||
return a.size() >= b.size() ? a : b;
|
||||
}
|
||||
|
||||
/**
|
||||
* This main method apply the enrichment of the instances
|
||||
*
|
||||
* @param toEnrichInstances the instances that could be enriched
|
||||
* @param enrichmentInstances the enrichment instances
|
||||
* @return list of instances possibly enriched
|
||||
*/
|
||||
private static List<Instance> enrichInstances(final List<Instance> toEnrichInstances,
|
||||
final List<Instance> enrichmentInstances) {
|
||||
final List<Instance> enrichmentResult = new ArrayList<>();
|
||||
|
||||
if (toEnrichInstances == null) {
|
||||
return enrichmentResult;
|
||||
}
|
||||
if (enrichmentInstances == null) {
|
||||
return enrichmentResult;
|
||||
}
|
||||
Map<String, Instance> ri = toInstanceMap(enrichmentInstances);
|
||||
|
||||
toEnrichInstances.forEach(i -> {
|
||||
final List<Instance> e = findEnrichmentsByPID(i.getPid(), ri);
|
||||
if (e != null && e.size() > 0) {
|
||||
e.forEach(enr -> applyEnrichment(i, enr));
|
||||
} else {
|
||||
final List<Instance> a = findEnrichmentsByPID(i.getAlternateIdentifier(), ri);
|
||||
if (a != null && a.size() > 0) {
|
||||
a.forEach(enr -> applyEnrichment(i, enr));
|
||||
}
|
||||
}
|
||||
enrichmentResult.add(i);
|
||||
});
|
||||
return enrichmentResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method converts the list of instance enrichments
|
||||
* into a Map where the key is the normalized identifier
|
||||
* and the value is the instance itself
|
||||
*
|
||||
* @param ri the list of enrichment instances
|
||||
* @return the result map
|
||||
*/
|
||||
private static Map<String, Instance> toInstanceMap(final List<Instance> ri) {
|
||||
return ri
|
||||
.stream()
|
||||
.filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null)
|
||||
.flatMap(i -> {
|
||||
final List<Pair<String, Instance>> result = new ArrayList<>();
|
||||
if (i.getPid() != null)
|
||||
i
|
||||
.getPid()
|
||||
.stream()
|
||||
.filter(MergeUtils::validPid)
|
||||
.forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
|
||||
if (i.getAlternateIdentifier() != null)
|
||||
i
|
||||
.getAlternateIdentifier()
|
||||
.stream()
|
||||
.filter(MergeUtils::validPid)
|
||||
.forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
|
||||
return result.stream();
|
||||
})
|
||||
.collect(
|
||||
Collectors
|
||||
.toMap(
|
||||
Pair::getLeft,
|
||||
Pair::getRight,
|
||||
(a, b) -> a));
|
||||
}
|
||||
|
||||
private static boolean isFromDelegatedAuthority(Result r) {
|
||||
return Optional
|
||||
.ofNullable(r.getInstance())
|
||||
.map(
|
||||
instance -> instance
|
||||
.stream()
|
||||
.filter(i -> Objects.nonNull(i.getCollectedfrom()))
|
||||
.map(i -> i.getCollectedfrom().getKey())
|
||||
.anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)))
|
||||
.orElse(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Valid pid boolean.
|
||||
*
|
||||
* @param p the p
|
||||
* @return the boolean
|
||||
*/
|
||||
private static boolean validPid(final StructuredProperty p) {
|
||||
return p.getValue() != null && p.getQualifier() != null && p.getQualifier().getClassid() != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize pid string.
|
||||
*
|
||||
* @param pid the pid
|
||||
* @return the string
|
||||
*/
|
||||
private static String extractKeyFromPid(final StructuredProperty pid) {
|
||||
if (pid == null)
|
||||
return null;
|
||||
final StructuredProperty normalizedPid = CleaningFunctions.normalizePidValue(pid);
|
||||
|
||||
return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue());
|
||||
}
|
||||
|
||||
/**
|
||||
* This utility method finds the list of enrichment instances
|
||||
* that match one or more PIDs in the input list
|
||||
*
|
||||
* @param pids the list of PIDs
|
||||
* @param enrichments the List of enrichment instances having the same pid
|
||||
* @return the list
|
||||
*/
|
||||
private static List<Instance> findEnrichmentsByPID(final List<StructuredProperty> pids,
|
||||
final Map<String, Instance> enrichments) {
|
||||
if (pids == null || enrichments == null)
|
||||
return null;
|
||||
return pids
|
||||
.stream()
|
||||
.map(MergeUtils::extractKeyFromPid)
|
||||
.map(enrichments::get)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* Is an enrichment boolean.
|
||||
*
|
||||
* @param e the e
|
||||
* @return the boolean
|
||||
*/
|
||||
private static boolean isAnEnrichment(OafEntity e) {
|
||||
return e.getDataInfo() != null &&
|
||||
e.getDataInfo().getProvenanceaction() != null
|
||||
&& ModelConstants.PROVENANCE_ENRICH.equalsIgnoreCase(e.getDataInfo().getProvenanceaction().getClassid());
|
||||
}
|
||||
|
||||
/**
|
||||
* This method apply enrichment on a single instance
|
||||
* The enrichment consists of replacing values on
|
||||
* single attribute only if in the current instance is missing
|
||||
* The only repeatable field enriched is measures
|
||||
*
|
||||
* @param merge the current instance
|
||||
* @param enrichment the enrichment instance
|
||||
*/
|
||||
private static void applyEnrichment(final Instance merge, final Instance enrichment) {
|
||||
if (merge == null || enrichment == null)
|
||||
return;
|
||||
|
||||
merge.setLicense(firstNonNull(merge.getLicense(), enrichment.getLicense()));
|
||||
merge.setAccessright(firstNonNull(merge.getAccessright(), enrichment.getAccessright()));
|
||||
merge.setInstancetype(firstNonNull(merge.getInstancetype(), enrichment.getInstancetype()));
|
||||
merge.setInstanceTypeMapping(firstNonNull(merge.getInstanceTypeMapping(), enrichment.getInstanceTypeMapping()));
|
||||
merge.setHostedby(firstNonNull(merge.getHostedby(), enrichment.getHostedby()));
|
||||
merge.setUrl(unionDistinctLists(merge.getUrl(), enrichment.getUrl(), 0));
|
||||
merge.setDistributionlocation(firstNonNull(merge.getDistributionlocation(), enrichment.getDistributionlocation()));
|
||||
merge.setCollectedfrom(firstNonNull(merge.getCollectedfrom(), enrichment.getCollectedfrom()));
|
||||
// pid and alternateId are used for matching
|
||||
merge.setDateofacceptance(firstNonNull(merge.getDateofacceptance(), enrichment.getDateofacceptance()));
|
||||
merge.setProcessingchargeamount(firstNonNull(merge.getProcessingchargeamount(), enrichment.getProcessingchargeamount()));
|
||||
merge.setProcessingchargecurrency(firstNonNull(merge.getProcessingchargecurrency(), enrichment.getProcessingchargecurrency()));
|
||||
merge.setRefereed(firstNonNull(merge.getRefereed(), enrichment.getRefereed()));
|
||||
merge.setMeasures(unionDistinctLists(merge.getMeasures(), enrichment.getMeasures(), 0));
|
||||
merge.setFulltext(firstNonNull(merge.getFulltext(), enrichment.getFulltext()));
|
||||
}
|
||||
|
||||
private static int compareTrust(Oaf a, Oaf b) {
|
||||
String left = Optional
|
||||
.ofNullable(a.getDataInfo())
|
||||
.map(DataInfo::getTrust)
|
||||
.orElse("0.0");
|
||||
|
||||
String right = Optional
|
||||
.ofNullable(b.getDataInfo())
|
||||
.map(DataInfo::getTrust)
|
||||
.orElse("0.0");
|
||||
|
||||
return left.compareTo(right);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,111 @@
|
||||
|
||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class MergeUtilsTest {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
|
||||
@Test
|
||||
void testMergePubs() throws IOException {
|
||||
Publication p1 = read("publication_1.json", Publication.class);
|
||||
Publication p2 = read("publication_2.json", Publication.class);
|
||||
Dataset d1 = read("dataset_1.json", Dataset.class);
|
||||
Dataset d2 = read("dataset_2.json", Dataset.class);
|
||||
|
||||
assertEquals(1, p1.getCollectedfrom().size());
|
||||
assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
|
||||
assertEquals(1, d2.getCollectedfrom().size());
|
||||
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||
|
||||
assertEquals(1, p2.getCollectedfrom().size());
|
||||
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||
assertEquals(1, d1.getCollectedfrom().size());
|
||||
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||
|
||||
final Result p1d2 = MergeUtils.checkedMerge(p1, d2);
|
||||
assertEquals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, p1d2.getResulttype().getClassid());
|
||||
assertTrue(p1d2 instanceof Publication);
|
||||
assertEquals(p1.getId(), p1d2.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testMergePubs_1() throws IOException {
|
||||
Publication p2 = read("publication_2.json", Publication.class);
|
||||
Dataset d1 = read("dataset_1.json", Dataset.class);
|
||||
|
||||
final Result p2d1 = MergeUtils.checkedMerge(p2, d1);
|
||||
assertEquals((ModelConstants.DATASET_RESULTTYPE_CLASSID), p2d1.getResulttype().getClassid());
|
||||
assertTrue(p2d1 instanceof Dataset);
|
||||
assertEquals(d1.getId(), p2d1.getId());
|
||||
assertEquals(2, p2d1.getCollectedfrom().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testMergePubs_2() throws IOException {
|
||||
Publication p1 = read("publication_1.json", Publication.class);
|
||||
Publication p2 = read("publication_2.json", Publication.class);
|
||||
|
||||
Result p1p2 = MergeUtils.checkedMerge(p1, p2);
|
||||
assertTrue(p1p2 instanceof Publication);
|
||||
assertEquals(p1.getId(), p1p2.getId());
|
||||
assertEquals(2, p1p2.getCollectedfrom().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDelegatedAuthority_1() throws IOException {
|
||||
Dataset d1 = read("dataset_2.json", Dataset.class);
|
||||
Dataset d2 = read("dataset_delegated.json", Dataset.class);
|
||||
|
||||
assertEquals(1, d2.getCollectedfrom().size());
|
||||
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
|
||||
|
||||
Result res = (Result) MergeUtils.merge(d1, d2, true);
|
||||
|
||||
assertEquals(d2, res);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDelegatedAuthority_2() throws IOException {
|
||||
Dataset p1 = read("publication_1.json", Dataset.class);
|
||||
Dataset d2 = read("dataset_delegated.json", Dataset.class);
|
||||
|
||||
assertEquals(1, d2.getCollectedfrom().size());
|
||||
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
|
||||
|
||||
Result res = (Result) MergeUtils.merge(p1, d2, true);
|
||||
|
||||
assertEquals(d2, res);
|
||||
}
|
||||
|
||||
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
|
||||
return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
|
||||
}
|
||||
|
||||
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
|
||||
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
|
||||
return OBJECT_MAPPER.readValue(json, clazz);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2024.
|
||||
* SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package eu.dnetlib.dhp.actionmanager.promote;
|
||||
|
||||
/** Encodes the Actionset promotion strategies */
|
||||
public class PromoteAction {
|
||||
|
||||
/** The supported actionset promotion strategies
|
||||
*
|
||||
* ENRICH: promotes only records in the actionset matching another record in the
|
||||
* graph and enriches them applying the given MergeAndGet strategy
|
||||
* UPSERT: promotes all the records in an actionset, matching records are updated
|
||||
* using the given MergeAndGet strategy, the non-matching record as inserted as they are.
|
||||
*/
|
||||
public enum Strategy {
|
||||
ENRICH, UPSERT
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string representation of the join type implementing the given PromoteAction.
|
||||
*
|
||||
* @param strategy the strategy to be used to promote the Actionset contents
|
||||
* @return the join type used to implement the promotion strategy
|
||||
*/
|
||||
public static String joinTypeForStrategy(PromoteAction.Strategy strategy) {
|
||||
switch (strategy) {
|
||||
case ENRICH:
|
||||
return "left_outer";
|
||||
case UPSERT:
|
||||
return "full_outer";
|
||||
default:
|
||||
throw new IllegalStateException("unsupported PromoteAction: " + strategy.toString());
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,195 @@
|
||||
|
||||
package eu.dnetlib.dhp.actionmanager.transformativeagreement;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.cli.ParseException;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.transformativeagreement.model.TransformativeAgreementModel;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Country;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.*;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class CreateActionSetSparkJob implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final String IREL_PROJECT = "40|100018998___::1e5e62235d094afd01cd56e65112fc63";
|
||||
private static final String TRANSFORMATIVE_AGREEMENT = "openapc::transformativeagreement";
|
||||
|
||||
public static void main(final String[] args) throws IOException, ParseException {
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
CreateActionSetSparkJob.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/actionmanager/transformativeagreement/as_parameters.json"))));
|
||||
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("inputPath");
|
||||
log.info("inputPath {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath {}", outputPath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> createActionSet(spark, inputPath, outputPath));
|
||||
|
||||
}
|
||||
|
||||
private static void createActionSet(SparkSession spark, String inputPath, String outputPath) {
|
||||
JavaRDD<AtomicAction> relations = spark
|
||||
.read()
|
||||
.textFile(inputPath)
|
||||
.map(
|
||||
(MapFunction<String, TransformativeAgreementModel>) value -> OBJECT_MAPPER
|
||||
.readValue(value, TransformativeAgreementModel.class),
|
||||
Encoders.bean(TransformativeAgreementModel.class))
|
||||
.flatMap(
|
||||
(FlatMapFunction<TransformativeAgreementModel, Relation>) value -> createRelation(
|
||||
value)
|
||||
.iterator(),
|
||||
Encoders.bean(Relation.class))
|
||||
.filter((FilterFunction<Relation>) Objects::nonNull)
|
||||
.toJavaRDD()
|
||||
.map(p -> new AtomicAction(p.getClass(), p));
|
||||
//TODO relations in stand-by waiting to know if we need to create them or not In case we need just make a union before saving the sequence file
|
||||
spark
|
||||
.read()
|
||||
.textFile(inputPath)
|
||||
.map(
|
||||
(MapFunction<String, TransformativeAgreementModel>) value -> OBJECT_MAPPER
|
||||
.readValue(value, TransformativeAgreementModel.class),
|
||||
Encoders.bean(TransformativeAgreementModel.class))
|
||||
.map(
|
||||
(MapFunction<TransformativeAgreementModel, Result>) value -> createResult(
|
||||
value),
|
||||
Encoders.bean(Result.class))
|
||||
.filter((FilterFunction<Result>) r -> r != null)
|
||||
.toJavaRDD()
|
||||
.map(p -> new AtomicAction(p.getClass(), p))
|
||||
.mapToPair(
|
||||
aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||
new Text(OBJECT_MAPPER.writeValueAsString(aa))))
|
||||
.saveAsHadoopFile(
|
||||
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
|
||||
|
||||
}
|
||||
|
||||
private static Result createResult(TransformativeAgreementModel value) {
|
||||
Result r = new Result();
|
||||
r
|
||||
.setId(
|
||||
"50|doi_________::"
|
||||
+ IdentifierFactory
|
||||
.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getDoi())));
|
||||
r.setTransformativeAgreement(value.getAgreement());
|
||||
Country country = new Country();
|
||||
country.setClassid(value.getCountry());
|
||||
country.setClassname(value.getCountry());
|
||||
country
|
||||
.setDataInfo(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, ModelConstants.SYSIMPORT_ACTIONSET, false, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
"openapc::transformativeagreement",
|
||||
"Harvested from Trnasformative Agreement file from OpenAPC",
|
||||
ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||
"0.9"));
|
||||
country.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE);
|
||||
country.setSchemename(ModelConstants.DNET_COUNTRY_TYPE);
|
||||
r.setCountry(Arrays.asList(country));
|
||||
return r;
|
||||
}
|
||||
|
||||
private static List<Relation> createRelation(TransformativeAgreementModel value) {
|
||||
|
||||
List<Relation> relationList = new ArrayList<>();
|
||||
|
||||
if (value.getAgreement().startsWith("IReL")) {
|
||||
String paper;
|
||||
|
||||
paper = "50|doi_________::"
|
||||
+ IdentifierFactory
|
||||
.md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getDoi()));
|
||||
|
||||
relationList
|
||||
.add(
|
||||
getRelation(
|
||||
paper,
|
||||
IREL_PROJECT, ModelConstants.IS_PRODUCED_BY));
|
||||
|
||||
relationList.add(getRelation(IREL_PROJECT, paper, ModelConstants.PRODUCES));
|
||||
}
|
||||
return relationList;
|
||||
}
|
||||
|
||||
public static Relation getRelation(
|
||||
String source,
|
||||
String target,
|
||||
String relClass) {
|
||||
|
||||
return OafMapperUtils
|
||||
.getRelation(
|
||||
source,
|
||||
target,
|
||||
ModelConstants.RESULT_PROJECT,
|
||||
ModelConstants.OUTCOME,
|
||||
relClass,
|
||||
Arrays
|
||||
.asList(
|
||||
OafMapperUtils.keyValue(ModelConstants.OPEN_APC_ID, ModelConstants.OPEN_APC_NAME)),
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, null, false, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
TRANSFORMATIVE_AGREEMENT, "Transformative Agreement",
|
||||
ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||
"0.9"),
|
||||
null);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
|
||||
package eu.dnetlib.dhp.actionmanager.transformativeagreement.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 18/12/23
|
||||
*/
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
|
||||
public class TransformativeAgreementModel implements Serializable {
|
||||
private String institution;
|
||||
private String doi;
|
||||
private String agreement;
|
||||
private String country;
|
||||
|
||||
public String getCountry() {
|
||||
return country;
|
||||
}
|
||||
|
||||
public void setCountry(String country) {
|
||||
this.country = country;
|
||||
}
|
||||
|
||||
public String getInstitution() {
|
||||
return institution;
|
||||
}
|
||||
|
||||
public void setInstitution(String institution) {
|
||||
this.institution = institution;
|
||||
}
|
||||
|
||||
public String getDoi() {
|
||||
return doi;
|
||||
}
|
||||
|
||||
public void setDoi(String doi) {
|
||||
this.doi = doi;
|
||||
}
|
||||
|
||||
public String getAgreement() {
|
||||
return agreement;
|
||||
}
|
||||
|
||||
public void setAgreement(String agreement) {
|
||||
this.agreement = agreement;
|
||||
}
|
||||
}
|
@ -0,0 +1,244 @@
|
||||
|
||||
package eu.dnetlib.dhp.collection.orcid;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
import javax.swing.*;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.http.HttpHeaders;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||
|
||||
public class ORCIDWorker extends Thread {
|
||||
|
||||
final static Logger log = LoggerFactory.getLogger(ORCIDWorker.class);
|
||||
|
||||
public static String JOB_COMPLETE = "JOB_COMPLETE";
|
||||
|
||||
private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
|
||||
|
||||
private final BlockingQueue<String> queue;
|
||||
|
||||
private boolean hasComplete = false;
|
||||
|
||||
private final SequenceFile.Writer employments;
|
||||
|
||||
private final SequenceFile.Writer summary;
|
||||
private final SequenceFile.Writer works;
|
||||
|
||||
private final String token;
|
||||
|
||||
private final String id;
|
||||
|
||||
public static ORCIDWorkerBuilder builder() {
|
||||
return new ORCIDWorkerBuilder();
|
||||
}
|
||||
|
||||
public ORCIDWorker(String id, BlockingQueue<String> myqueue, SequenceFile.Writer employments,
|
||||
SequenceFile.Writer summary, SequenceFile.Writer works, String token) {
|
||||
this.id = id;
|
||||
this.queue = myqueue;
|
||||
this.employments = employments;
|
||||
this.summary = summary;
|
||||
this.works = works;
|
||||
this.token = token;
|
||||
}
|
||||
|
||||
public static String retrieveURL(final String id, final String apiUrl, String token) {
|
||||
try {
|
||||
final HttpURLConnection urlConn = getHttpURLConnection(apiUrl, token);
|
||||
if (urlConn.getResponseCode() > 199 && urlConn.getResponseCode() < 300) {
|
||||
InputStream input = urlConn.getInputStream();
|
||||
return IOUtils.toString(input);
|
||||
} else {
|
||||
log
|
||||
.error(
|
||||
"Thread {} UNABLE TO DOWNLOAD FROM THIS URL {} , status code {}", id, apiUrl,
|
||||
urlConn.getResponseCode());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Thread {} Error on retrieving URL {} {}", id, apiUrl, e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@NotNull
|
||||
private static HttpURLConnection getHttpURLConnection(String apiUrl, String token) throws IOException {
|
||||
final HttpURLConnection urlConn = (HttpURLConnection) new URL(apiUrl).openConnection();
|
||||
final HttpClientParams clientParams = new HttpClientParams();
|
||||
urlConn.setInstanceFollowRedirects(false);
|
||||
urlConn.setReadTimeout(clientParams.getReadTimeOut() * 1000);
|
||||
urlConn.setConnectTimeout(clientParams.getConnectTimeOut() * 1000);
|
||||
urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent);
|
||||
urlConn.addRequestProperty(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", token));
|
||||
return urlConn;
|
||||
}
|
||||
|
||||
private static String generateSummaryURL(final String orcidId) {
|
||||
return "https://api.orcid.org/v3.0/" + orcidId + "/record";
|
||||
}
|
||||
|
||||
private static String generateWorksURL(final String orcidId) {
|
||||
return "https://api.orcid.org/v3.0/" + orcidId + "/works";
|
||||
}
|
||||
|
||||
private static String generateEmploymentsURL(final String orcidId) {
|
||||
return "https://api.orcid.org/v3.0/" + orcidId + "/employments";
|
||||
}
|
||||
|
||||
private static void writeResultToSequenceFile(String id, String url, String token, String orcidId,
|
||||
SequenceFile.Writer file) throws IOException {
|
||||
final String response = retrieveURL(id, url, token);
|
||||
if (response != null) {
|
||||
if (orcidId == null) {
|
||||
log.error("Thread {} {} {}", id, orcidId, response);
|
||||
throw new RuntimeException("null items ");
|
||||
}
|
||||
|
||||
if (file == null) {
|
||||
log.error("Thread {} file is null for {} URL:{}", id, url, orcidId);
|
||||
} else {
|
||||
file.append(new Text(orcidId), new Text(response));
|
||||
file.hflush();
|
||||
}
|
||||
|
||||
} else
|
||||
log.error("Thread {} response is null for {} URL:{}", id, url, orcidId);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
final Text key = new Text();
|
||||
final Text value = new Text();
|
||||
long start;
|
||||
long total_time;
|
||||
String orcidId = "";
|
||||
int requests = 0;
|
||||
if (summary == null || employments == null || works == null)
|
||||
throw new RuntimeException("Null files");
|
||||
|
||||
while (!hasComplete) {
|
||||
try {
|
||||
|
||||
orcidId = queue.take();
|
||||
|
||||
if (orcidId.equalsIgnoreCase(JOB_COMPLETE)) {
|
||||
hasComplete = true;
|
||||
} else {
|
||||
start = System.currentTimeMillis();
|
||||
writeResultToSequenceFile(id, generateSummaryURL(orcidId), token, orcidId, summary);
|
||||
total_time = System.currentTimeMillis() - start;
|
||||
requests++;
|
||||
if (total_time < 1000) {
|
||||
// I know making a sleep on a thread is bad, but we need to stay to 24 requests per seconds,
|
||||
// hence
|
||||
// the time between two http request in a thread must be 1 second
|
||||
Thread.sleep(1000L - total_time);
|
||||
}
|
||||
start = System.currentTimeMillis();
|
||||
writeResultToSequenceFile(id, generateWorksURL(orcidId), token, orcidId, works);
|
||||
total_time = System.currentTimeMillis() - start;
|
||||
requests++;
|
||||
if (total_time < 1000) {
|
||||
// I know making a sleep on a thread is bad, but we need to stay to 24 requests per seconds,
|
||||
// hence
|
||||
// the time between two http request in a thread must be 1 second
|
||||
Thread.sleep(1000L - total_time);
|
||||
}
|
||||
start = System.currentTimeMillis();
|
||||
writeResultToSequenceFile(id, generateEmploymentsURL(orcidId), token, orcidId, employments);
|
||||
total_time = System.currentTimeMillis() - start;
|
||||
requests++;
|
||||
if (total_time < 1000) {
|
||||
// I know making a sleep on a thread is bad, but we need to stay to 24 requests per seconds,
|
||||
// hence
|
||||
// the time between two http request in a thread must be 1 second
|
||||
Thread.sleep(1000L - total_time);
|
||||
}
|
||||
if (requests % 30 == 0) {
|
||||
log.info("Thread {} Downloaded {}", id, requests);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Throwable e) {
|
||||
|
||||
log.error("Thread {} Unable to save ORICD: {} item error", id, orcidId, e);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
try {
|
||||
works.close();
|
||||
summary.close();
|
||||
employments.close();
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
log.info("Thread {} COMPLETE ", id);
|
||||
log.info("Thread {} Downloaded {}", id, requests);
|
||||
|
||||
}
|
||||
|
||||
public static class ORCIDWorkerBuilder {
|
||||
|
||||
private String id;
|
||||
private SequenceFile.Writer employments;
|
||||
private SequenceFile.Writer summary;
|
||||
private SequenceFile.Writer works;
|
||||
private BlockingQueue<String> queue;
|
||||
|
||||
private String token;
|
||||
|
||||
public ORCIDWorkerBuilder withId(final String id) {
|
||||
this.id = id;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ORCIDWorkerBuilder withEmployments(final SequenceFile.Writer sequenceFile) {
|
||||
this.employments = sequenceFile;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ORCIDWorkerBuilder withSummary(final SequenceFile.Writer sequenceFile) {
|
||||
this.summary = sequenceFile;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ORCIDWorkerBuilder withWorks(final SequenceFile.Writer sequenceFile) {
|
||||
this.works = sequenceFile;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ORCIDWorkerBuilder withAccessToken(final String accessToken) {
|
||||
this.token = accessToken;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ORCIDWorkerBuilder withBlockingQueue(final BlockingQueue<String> queue) {
|
||||
this.queue = queue;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ORCIDWorker build() {
|
||||
if (this.summary == null || this.works == null || this.employments == null || StringUtils.isEmpty(token)
|
||||
|| queue == null)
|
||||
throw new RuntimeException("Unable to build missing required params");
|
||||
return new ORCIDWorker(id, queue, employments, summary, works, token);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,171 @@
|
||||
|
||||
package eu.dnetlib.dhp.collection.orcid;
|
||||
|
||||
import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
||||
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||
|
||||
public class OrcidGetUpdatesFile {
|
||||
|
||||
private static Logger log = LoggerFactory.getLogger(OrcidGetUpdatesFile.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
OrcidGetUpdatesFile.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/collection/orcid/download_orcid_update_parameter.json")))
|
||||
|
||||
);
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String namenode = parser.get("namenode");
|
||||
log.info("got variable namenode: {}", namenode);
|
||||
|
||||
final String master = parser.get("master");
|
||||
log.info("got variable master: {}", master);
|
||||
|
||||
final String targetPath = parser.get("targetPath");
|
||||
log.info("got variable targetPath: {}", targetPath);
|
||||
|
||||
final String apiURL = parser.get("apiURL");
|
||||
log.info("got variable apiURL: {}", apiURL);
|
||||
|
||||
final String accessToken = parser.get("accessToken");
|
||||
log.info("got variable accessToken: {}", accessToken);
|
||||
|
||||
final String graphPath = parser.get("graphPath");
|
||||
log.info("got variable graphPath: {}", graphPath);
|
||||
|
||||
final SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName(OrcidGetUpdatesFile.class.getName())
|
||||
.master(master)
|
||||
.getOrCreate();
|
||||
|
||||
final String latestDate = spark
|
||||
.read()
|
||||
.load(graphPath + "/Authors")
|
||||
.selectExpr("max(lastModifiedDate)")
|
||||
.first()
|
||||
.getString(0);
|
||||
|
||||
log.info("latest date is {}", latestDate);
|
||||
|
||||
final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(namenode));
|
||||
|
||||
new OrcidGetUpdatesFile().readTar(fileSystem, accessToken, apiURL, targetPath, latestDate);
|
||||
|
||||
}
|
||||
|
||||
private SequenceFile.Writer createFile(Path aPath, FileSystem fileSystem) throws IOException {
|
||||
return SequenceFile
|
||||
.createWriter(
|
||||
fileSystem.getConf(),
|
||||
SequenceFile.Writer.file(aPath),
|
||||
SequenceFile.Writer.keyClass(Text.class),
|
||||
SequenceFile.Writer.valueClass(Text.class));
|
||||
}
|
||||
|
||||
private ORCIDWorker createWorker(final String id, final String targetPath, final BlockingQueue<String> queue,
|
||||
final String accessToken, FileSystem fileSystem) throws Exception {
|
||||
return ORCIDWorker
|
||||
.builder()
|
||||
.withId(id)
|
||||
.withEmployments(createFile(new Path(String.format("%s/employments_%s", targetPath, id)), fileSystem))
|
||||
.withSummary(createFile(new Path(String.format("%s/summary_%s", targetPath, id)), fileSystem))
|
||||
.withWorks(createFile(new Path(String.format("%s/works_%s", targetPath, id)), fileSystem))
|
||||
.withAccessToken(accessToken)
|
||||
.withBlockingQueue(queue)
|
||||
.build();
|
||||
}
|
||||
|
||||
public void readTar(FileSystem fileSystem, final String accessToken, final String apiURL, final String targetPath,
|
||||
final String startDate) throws Exception {
|
||||
final HttpURLConnection urlConn = (HttpURLConnection) new URL(apiURL).openConnection();
|
||||
final HttpClientParams clientParams = new HttpClientParams();
|
||||
urlConn.setInstanceFollowRedirects(false);
|
||||
urlConn.setReadTimeout(clientParams.getReadTimeOut() * 1000);
|
||||
urlConn.setConnectTimeout(clientParams.getConnectTimeOut() * 1000);
|
||||
if (urlConn.getResponseCode() > 199 && urlConn.getResponseCode() < 300) {
|
||||
InputStream input = urlConn.getInputStream();
|
||||
|
||||
Path hdfsWritePath = new Path("/tmp/orcid_updates.tar.gz");
|
||||
final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true);
|
||||
IOUtils.copy(input, fsDataOutputStream);
|
||||
fsDataOutputStream.flush();
|
||||
fsDataOutputStream.close();
|
||||
FSDataInputStream updateFile = fileSystem.open(hdfsWritePath);
|
||||
TarArchiveInputStream tais = new TarArchiveInputStream(new GzipCompressorInputStream(
|
||||
new BufferedInputStream(
|
||||
updateFile.getWrappedStream())));
|
||||
TarArchiveEntry entry;
|
||||
|
||||
BlockingQueue<String> queue = new ArrayBlockingQueue<String>(3000);
|
||||
final List<ORCIDWorker> workers = new ArrayList<>();
|
||||
for (int i = 0; i < 22; i++) {
|
||||
workers.add(createWorker("" + i, targetPath, queue, accessToken, fileSystem));
|
||||
}
|
||||
workers.forEach(Thread::start);
|
||||
|
||||
while ((entry = tais.getNextTarEntry()) != null) {
|
||||
|
||||
if (entry.isFile()) {
|
||||
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(tais));
|
||||
System.out.println(br.readLine());
|
||||
br
|
||||
.lines()
|
||||
.map(l -> l.split(","))
|
||||
.filter(s -> StringUtils.compare(s[3].substring(0, 10), startDate) > 0)
|
||||
.map(s -> s[0])
|
||||
.forEach(s -> {
|
||||
try {
|
||||
queue.put(s);
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 22; i++) {
|
||||
queue.put(ORCIDWorker.JOB_COMPLETE);
|
||||
}
|
||||
for (ORCIDWorker worker : workers) {
|
||||
worker.join();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,171 @@
|
||||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.StringWriter;
|
||||
import java.util.Iterator;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
import javax.xml.stream.XMLEventReader;
|
||||
import javax.xml.stream.XMLEventWriter;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLOutputFactory;
|
||||
import javax.xml.stream.events.EndElement;
|
||||
import javax.xml.stream.events.StartElement;
|
||||
import javax.xml.stream.events.XMLEvent;
|
||||
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
||||
import org.apache.commons.compress.compressors.CompressorInputStream;
|
||||
import org.apache.commons.compress.compressors.CompressorStreamFactory;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
|
||||
public class BaseCollectorIterator implements Iterator<String> {
|
||||
|
||||
private String nextElement;
|
||||
|
||||
private final BlockingQueue<String> queue = new LinkedBlockingQueue<>(100);
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(BaseCollectorIterator.class);
|
||||
|
||||
private static final String END_ELEM = "__END__";
|
||||
|
||||
public BaseCollectorIterator(final FileSystem fs, final Path filePath, final AggregatorReport report) {
|
||||
new Thread(() -> importHadoopFile(fs, filePath, report)).start();
|
||||
try {
|
||||
this.nextElement = this.queue.take();
|
||||
} catch (final InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
protected BaseCollectorIterator(final String resourcePath, final AggregatorReport report) {
|
||||
new Thread(() -> importTestFile(resourcePath, report)).start();
|
||||
try {
|
||||
this.nextElement = this.queue.take();
|
||||
} catch (final InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean hasNext() {
|
||||
return (this.nextElement != null) & !END_ELEM.equals(this.nextElement);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String next() {
|
||||
try {
|
||||
return END_ELEM.equals(this.nextElement) ? null : this.nextElement;
|
||||
} finally {
|
||||
try {
|
||||
this.nextElement = this.queue.take();
|
||||
} catch (final InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void importHadoopFile(final FileSystem fs, final Path filePath, final AggregatorReport report) {
|
||||
log.info("I start to read the TAR stream");
|
||||
|
||||
try (InputStream origInputStream = fs.open(filePath);
|
||||
final TarArchiveInputStream tarInputStream = new TarArchiveInputStream(origInputStream)) {
|
||||
importTarStream(tarInputStream, report);
|
||||
} catch (final Throwable e) {
|
||||
throw new RuntimeException("Error processing BASE records", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void importTestFile(final String resourcePath, final AggregatorReport report) {
|
||||
try (final InputStream origInputStream = BaseCollectorIterator.class.getResourceAsStream(resourcePath);
|
||||
final TarArchiveInputStream tarInputStream = new TarArchiveInputStream(origInputStream)) {
|
||||
importTarStream(tarInputStream, report);
|
||||
} catch (final Throwable e) {
|
||||
throw new RuntimeException("Error processing BASE records", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void importTarStream(final TarArchiveInputStream tarInputStream, final AggregatorReport report) {
|
||||
long count = 0;
|
||||
|
||||
final XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
|
||||
final XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance();
|
||||
|
||||
try {
|
||||
TarArchiveEntry entry;
|
||||
while ((entry = (TarArchiveEntry) tarInputStream.getNextEntry()) != null) {
|
||||
final String name = entry.getName();
|
||||
|
||||
if (!entry.isDirectory() && name.contains("ListRecords") && name.endsWith(".bz2")) {
|
||||
|
||||
log.info("Processing file (BZIP): " + name);
|
||||
|
||||
final byte[] bzipData = new byte[(int) entry.getSize()];
|
||||
IOUtils.readFully(tarInputStream, bzipData);
|
||||
|
||||
try (InputStream bzipIs = new ByteArrayInputStream(bzipData);
|
||||
final BufferedInputStream bzipBis = new BufferedInputStream(bzipIs);
|
||||
final CompressorInputStream bzipInput = new CompressorStreamFactory()
|
||||
.createCompressorInputStream(bzipBis)) {
|
||||
|
||||
final XMLEventReader reader = xmlInputFactory.createXMLEventReader(bzipInput);
|
||||
|
||||
XMLEventWriter eventWriter = null;
|
||||
StringWriter xmlWriter = null;
|
||||
|
||||
while (reader.hasNext()) {
|
||||
final XMLEvent nextEvent = reader.nextEvent();
|
||||
|
||||
if (nextEvent.isStartElement()) {
|
||||
final StartElement startElement = nextEvent.asStartElement();
|
||||
if ("record".equals(startElement.getName().getLocalPart())) {
|
||||
xmlWriter = new StringWriter();
|
||||
eventWriter = xmlOutputFactory.createXMLEventWriter(xmlWriter);
|
||||
}
|
||||
}
|
||||
|
||||
if (eventWriter != null) {
|
||||
eventWriter.add(nextEvent);
|
||||
}
|
||||
|
||||
if (nextEvent.isEndElement()) {
|
||||
final EndElement endElement = nextEvent.asEndElement();
|
||||
if ("record".equals(endElement.getName().getLocalPart())) {
|
||||
eventWriter.flush();
|
||||
eventWriter.close();
|
||||
|
||||
this.queue.put(xmlWriter.toString());
|
||||
|
||||
eventWriter = null;
|
||||
xmlWriter = null;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.queue.put(END_ELEM); // TO INDICATE THE END OF THE QUEUE
|
||||
} catch (final Throwable e) {
|
||||
log.error("Error processing BASE records", e);
|
||||
report.put(e.getClass().getName(), e.getMessage());
|
||||
throw new RuntimeException("Error processing BASE records", e);
|
||||
} finally {
|
||||
log.info("Total records (written in queue): " + count);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,159 @@
|
||||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.Spliterator;
|
||||
import java.util.Spliterators;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.DocumentHelper;
|
||||
import org.dom4j.Node;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.file.AbstractSplittedRecordPlugin;
|
||||
import eu.dnetlib.dhp.common.DbClient;
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
|
||||
public class BaseCollectorPlugin implements CollectorPlugin {
|
||||
|
||||
private final FileSystem fs;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(AbstractSplittedRecordPlugin.class);
|
||||
|
||||
// MAPPING AND FILTERING ARE DEFINED HERE:
|
||||
// https://docs.google.com/document/d/1Aj-ZAV11b44MCrAAUCPiS2TUlXb6PnJEu1utCMAcCOU/edit
|
||||
|
||||
public BaseCollectorPlugin(final FileSystem fs) {
|
||||
this.fs = fs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report) throws CollectorException {
|
||||
// the path of the dump file on HDFS
|
||||
// http://oai.base-search.net/initial_load/base_oaipmh_dump-current.tar
|
||||
// it could be downloaded from iis-cdh5-test-gw.ocean.icm.edu.pl and then copied on HDFS
|
||||
final Path filePath = Optional
|
||||
.ofNullable(api.getBaseUrl())
|
||||
.map(Path::new)
|
||||
.orElseThrow(() -> new CollectorException("missing baseUrl"));
|
||||
|
||||
// get the parameters for the connection to the OpenAIRE database.
|
||||
// the database is used to obtain the list of the datasources that the plugin will collect
|
||||
final String dbUrl = api.getParams().get("dbUrl");
|
||||
final String dbUser = api.getParams().get("dbUser");
|
||||
final String dbPassword = api.getParams().get("dbPassword");
|
||||
|
||||
// the types(comma separated, empty value for all) that the plugin will collect,
|
||||
// the types should be expressed in the format of the normalized types of BASE (for example 1,121,...)
|
||||
final String acceptedNormTypesString = api.getParams().get("acceptedNormTypes");
|
||||
|
||||
log.info("baseUrl: {}", filePath);
|
||||
log.info("dbUrl: {}", dbUrl);
|
||||
log.info("dbUser: {}", dbUser);
|
||||
log.info("dbPassword: {}", "***");
|
||||
log.info("acceptedNormTypes: {}", acceptedNormTypesString);
|
||||
|
||||
try {
|
||||
if (!this.fs.exists(filePath)) {
|
||||
throw new CollectorException("path does not exist: " + filePath);
|
||||
}
|
||||
} catch (final Throwable e) {
|
||||
throw new CollectorException(e);
|
||||
}
|
||||
|
||||
final Set<String> acceptedOpendoarIds = findAcceptedOpendoarIds(dbUrl, dbUser, dbPassword);
|
||||
|
||||
final Set<String> acceptedNormTypes = new HashSet<>();
|
||||
if (StringUtils.isNotBlank(acceptedNormTypesString)) {
|
||||
for (final String s : StringUtils.split(acceptedNormTypesString, ",")) {
|
||||
if (StringUtils.isNotBlank(s)) {
|
||||
acceptedNormTypes.add(s.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final Iterator<String> iterator = new BaseCollectorIterator(this.fs, filePath, report);
|
||||
final Spliterator<String> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
|
||||
return StreamSupport
|
||||
.stream(spliterator, false)
|
||||
.filter(doc -> filterXml(doc, acceptedOpendoarIds, acceptedNormTypes));
|
||||
}
|
||||
|
||||
private Set<String> findAcceptedOpendoarIds(final String dbUrl, final String dbUser, final String dbPassword)
|
||||
throws CollectorException {
|
||||
final Set<String> accepted = new HashSet<>();
|
||||
|
||||
try (final DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) {
|
||||
|
||||
final String sql = IOUtils
|
||||
.toString(
|
||||
getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql"));
|
||||
|
||||
dbClient.processResults(sql, row -> {
|
||||
try {
|
||||
final String dsId = row.getString("id");
|
||||
log.info("Accepted Datasource: " + dsId);
|
||||
accepted.add(dsId);
|
||||
} catch (final SQLException e) {
|
||||
log.error("Error in SQL", e);
|
||||
throw new RuntimeException("Error in SQL", e);
|
||||
}
|
||||
});
|
||||
|
||||
} catch (final IOException e) {
|
||||
log.error("Error accessong SQL", e);
|
||||
throw new CollectorException("Error accessong SQL", e);
|
||||
}
|
||||
|
||||
log.info("Accepted Datasources (TOTAL): " + accepted.size());
|
||||
|
||||
return accepted;
|
||||
}
|
||||
|
||||
protected static boolean filterXml(final String xml,
|
||||
final Set<String> acceptedOpendoarIds,
|
||||
final Set<String> acceptedNormTypes) {
|
||||
try {
|
||||
|
||||
final Document doc = DocumentHelper.parseText(xml);
|
||||
|
||||
final String id = doc.valueOf("//*[local-name()='collection']/@opendoar_id").trim();
|
||||
|
||||
if (StringUtils.isBlank(id) || !acceptedOpendoarIds.contains("opendoar____::" + id)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (acceptedNormTypes.isEmpty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (final Object s : doc.selectNodes("//*[local-name()='typenorm']")) {
|
||||
if (acceptedNormTypes.contains(((Node) s).getText().trim())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (final DocumentException e) {
|
||||
log.error("Error parsing document", e);
|
||||
throw new RuntimeException("Error parsing document", e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
[
|
||||
{
|
||||
"paramName": "ip",
|
||||
"paramLongName": "inputPath",
|
||||
"paramDescription": "the zipped opencitations file",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "op",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the working path",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "issm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "the hdfs name node",
|
||||
"paramRequired": false
|
||||
}
|
||||
]
|
@ -0,0 +1,30 @@
|
||||
[
|
||||
|
||||
{
|
||||
"paramName": "issm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "the hdfs name node",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "d",
|
||||
"paramLongName": "delimiter",
|
||||
"paramDescription": "the hdfs name node",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "op",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the hdfs name node",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "if",
|
||||
"paramLongName": "inputFile",
|
||||
"paramDescription": "the hdfs name node",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
|
@ -0,0 +1,58 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive_metastore_uris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorNumber</name>
|
||||
<value>4</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<value>/user/spark/spark2ApplicationHistory</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<value>15G</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<value>6G</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<value>1</value>
|
||||
</property>
|
||||
</configuration>
|
@ -0,0 +1,2 @@
|
||||
#!/bin/bash
|
||||
curl -L $1 | hdfs dfs -put - $2
|
@ -0,0 +1,82 @@
|
||||
<workflow-app name="Transfomative Agreement Integration" xmlns="uri:oozie:workflow:0.5">
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||
<value>${oozieLauncherQueueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="resume_from"/>
|
||||
|
||||
<decision name="resume_from">
|
||||
<switch>
|
||||
<case to="download">${wf:conf('resumeFrom') eq 'DownloadDump'}</case>
|
||||
<default to="create_actionset"/> <!-- first action to be done when downloadDump is to be performed -->
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="download">
|
||||
<shell xmlns="uri:oozie:shell-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapred.job.queue.name</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
<exec>download.sh</exec>
|
||||
<argument>${inputFile}</argument>
|
||||
<argument>${workingDir}/transformativeagreement/transformativeAgreement.json</argument>
|
||||
<env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
|
||||
<file>download.sh</file>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="create_actionset"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<action name="create_actionset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Produces the AS for the Transformative Agreement</name>
|
||||
<class>eu.dnetlib.dhp.actionmanager.transformativeagreement.CreateActionSetSparkJob</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--inputPath</arg><arg>${workingDir}/transformativeagreement/</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
@ -0,0 +1,26 @@
|
||||
[
|
||||
{
|
||||
"paramName": "m",
|
||||
"paramLongName": "master",
|
||||
"paramDescription": "the master name",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "t",
|
||||
"paramLongName": "targetPath",
|
||||
"paramDescription": "the target PATH of the DF tables",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "g",
|
||||
"paramLongName": "graphPath",
|
||||
"paramDescription": "the PATH of the current graph path",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "u",
|
||||
"paramLongName": "updatePath",
|
||||
"paramDescription": "the PATH of the current graph update path",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
@ -0,0 +1,37 @@
|
||||
[ {
|
||||
"paramName": "n",
|
||||
"paramLongName": "namenode",
|
||||
"paramDescription": "the Name Node URI",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "m",
|
||||
"paramLongName": "master",
|
||||
"paramDescription": "the master name",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "t",
|
||||
"paramLongName": "targetPath",
|
||||
"paramDescription": "the target PATH where download the files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "a",
|
||||
"paramLongName": "apiURL",
|
||||
"paramDescription": "the URL to download the tar file",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "g",
|
||||
"paramLongName": "graphPath",
|
||||
"paramDescription": "the path of the input graph",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "at",
|
||||
"paramLongName": "accessToken",
|
||||
"paramDescription": "the accessToken to contact API",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
@ -0,0 +1,23 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
@ -0,0 +1,114 @@
|
||||
<workflow-app name="download_Update_ORCID" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>graphPath</name>
|
||||
<description>the path to store the original ORCID dump</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>targetPath</name>
|
||||
<description>the path to store the original ORCID dump</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>apiURL</name>
|
||||
<value>http://74804fb637bd8e2fba5b-e0a029c2f87486cddec3b416996a6057.r3.cf1.rackcdn.com/last_modified.csv.tar</value>
|
||||
<description>The URL of the update CSV list </description>
|
||||
</property>
|
||||
<property>
|
||||
<name>accessToken</name>
|
||||
<description>The access token</description>
|
||||
</property>
|
||||
|
||||
|
||||
</parameters>
|
||||
|
||||
<start to="startUpdate"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="startUpdate">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Check Latest Orcid and Download updates</name>
|
||||
<class>eu.dnetlib.dhp.collection.orcid.OrcidGetUpdatesFile</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=2g
|
||||
--conf spark.sql.shuffle.partitions=3000
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
<arg>--namenode</arg><arg>${nameNode}</arg>
|
||||
<arg>--graphPath</arg><arg>${graphPath}</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||
<arg>--apiURL</arg><arg>${apiURL}</arg>
|
||||
<arg>--accessToken</arg><arg>${accessToken}</arg>
|
||||
</spark>
|
||||
<ok to="generateTables"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<action name="generateTables">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Generate ORCID Tables</name>
|
||||
<class>eu.dnetlib.dhp.collection.orcid.SparkGenerateORCIDTable</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=2g
|
||||
--conf spark.sql.shuffle.partitions=3000
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${targetPath}</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}/updateTable</arg>
|
||||
<arg>--fromUpdate</arg><arg>true</arg>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
</spark>
|
||||
<ok to="updateTable"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="updateTable">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Update ORCID Tables</name>
|
||||
<class>eu.dnetlib.dhp.collection.orcid.SparkApplyUpdate</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=2g
|
||||
--conf spark.sql.shuffle.partitions=3000
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphPath}</arg>
|
||||
<arg>--updatePath</arg><arg>${targetPath}/updateTable</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}/newTable</arg>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
@ -0,0 +1,114 @@
|
||||
BEGIN;
|
||||
|
||||
INSERT INTO dsm_services(
|
||||
_dnet_resource_identifier_,
|
||||
id,
|
||||
officialname,
|
||||
englishname,
|
||||
namespaceprefix,
|
||||
websiteurl,
|
||||
logourl,
|
||||
platform,
|
||||
contactemail,
|
||||
collectedfrom,
|
||||
provenanceaction,
|
||||
_typology_to_remove_,
|
||||
eosc_type,
|
||||
eosc_datasource_type,
|
||||
research_entity_types,
|
||||
thematic
|
||||
) VALUES (
|
||||
'openaire____::base_search',
|
||||
'openaire____::base_search',
|
||||
'Bielefeld Academic Search Engine (BASE)',
|
||||
'Bielefeld Academic Search Engine (BASE)',
|
||||
'base_search_',
|
||||
'https://www.base-search.net',
|
||||
'https://www.base-search.net/about/download/logo_224x57_white.gif',
|
||||
'BASE',
|
||||
'openaire-helpdesk@uni-bielefeld.de',
|
||||
'infrastruct_::openaire',
|
||||
'user:insert',
|
||||
'aggregator::pubsrepository::unknown',
|
||||
'Data Source',
|
||||
'Aggregator',
|
||||
ARRAY['Research Products'],
|
||||
false
|
||||
);
|
||||
|
||||
INSERT INTO dsm_service_organization(
|
||||
_dnet_resource_identifier_,
|
||||
organization,
|
||||
service
|
||||
) VALUES (
|
||||
'fairsharing_::org::214@@openaire____::base_search',
|
||||
'fairsharing_::org::214',
|
||||
'openaire____::base_search'
|
||||
);
|
||||
|
||||
INSERT INTO dsm_api(
|
||||
_dnet_resource_identifier_,
|
||||
id,
|
||||
service,
|
||||
protocol,
|
||||
baseurl,
|
||||
metadata_identifier_path
|
||||
) VALUES (
|
||||
'api_________::openaire____::base_search::dump',
|
||||
'api_________::openaire____::base_search::dump',
|
||||
'openaire____::base_search',
|
||||
'baseDump',
|
||||
'/user/michele.artini/base-import/base_oaipmh_dump-current.tar',
|
||||
'//*[local-name()=''header'']/*[local-name()=''identifier'']'
|
||||
);
|
||||
|
||||
|
||||
INSERT INTO dsm_apiparams(
|
||||
_dnet_resource_identifier_,
|
||||
api,
|
||||
param,
|
||||
value
|
||||
) VALUES (
|
||||
'api_________::openaire____::base_search::dump@@dbUrl',
|
||||
'api_________::openaire____::base_search::dump',
|
||||
'dbUrl',
|
||||
'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus'
|
||||
);
|
||||
|
||||
INSERT INTO dsm_apiparams(
|
||||
_dnet_resource_identifier_,
|
||||
api,
|
||||
param,
|
||||
value
|
||||
) VALUES (
|
||||
'api_________::openaire____::base_search::dump@@dbUser',
|
||||
'api_________::openaire____::base_search::dump',
|
||||
'dbUser',
|
||||
'dnet'
|
||||
);
|
||||
|
||||
INSERT INTO dsm_apiparams(
|
||||
_dnet_resource_identifier_,
|
||||
api,
|
||||
param,
|
||||
value
|
||||
) VALUES (
|
||||
'api_________::openaire____::base_search::dump@@dbPassword',
|
||||
'api_________::openaire____::base_search::dump',
|
||||
'dbPassword',
|
||||
'***'
|
||||
);
|
||||
|
||||
INSERT INTO dsm_apiparams(
|
||||
_dnet_resource_identifier_,
|
||||
api,
|
||||
param,
|
||||
value
|
||||
) VALUES (
|
||||
'api_________::openaire____::base_search::dump@@acceptedNormTypes',
|
||||
'api_________::openaire____::base_search::dump',
|
||||
'acceptedNormTypes',
|
||||
'1,11,111,121,14,15,18,181,182,183,1A,6,7'
|
||||
);
|
||||
|
||||
COMMIT;
|
@ -0,0 +1,9 @@
|
||||
select s.id as id
|
||||
from dsm_services s
|
||||
where collectedfrom = 'openaire____::opendoar'
|
||||
and jurisdiction = 'Institutional'
|
||||
and s.id in (
|
||||
select service from dsm_api where coalesce(compatibility_override, compatibility) = 'driver' or coalesce(compatibility_override, compatibility) = 'UNKNOWN'
|
||||
) and s.id not in (
|
||||
select service from dsm_api where coalesce(compatibility_override, compatibility) like '%openaire%'
|
||||
);
|
@ -0,0 +1,11 @@
|
||||
select
|
||||
s.id as id,
|
||||
s.jurisdiction as jurisdiction,
|
||||
array_remove(array_agg(a.id || ' (compliance: ' || coalesce(a.compatibility_override, a.compatibility, 'UNKNOWN') || ')@@@' || coalesce(a.last_collection_total, 0)), NULL) as aggregations
|
||||
from
|
||||
dsm_services s
|
||||
join dsm_api a on (s.id = a.service)
|
||||
where
|
||||
collectedfrom = 'openaire____::opendoar'
|
||||
group by
|
||||
s.id;
|
@ -0,0 +1,180 @@
|
||||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="c67911d6-9988-4a3b-b965-7d39bdd4a31d_Vm9jYWJ1bGFyeURTUmVzb3VyY2VzL1ZvY2FidWxhcnlEU1Jlc291cmNlVHlwZQ==" />
|
||||
<RESOURCE_TYPE value="VocabularyDSResourceType" />
|
||||
<RESOURCE_KIND value="VocabularyDSResources" />
|
||||
<RESOURCE_URI value="" />
|
||||
<DATE_OF_CREATION value="2024-02-13T11:15:48+00:00" />
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<CONFIGURATION>
|
||||
<VOCABULARY_NAME code="base:normalized_types">base:normalized_types</VOCABULARY_NAME>
|
||||
<VOCABULARY_DESCRIPTION>base:normalized_types</VOCABULARY_DESCRIPTION>
|
||||
<TERMS>
|
||||
<TERM native_name="Text" code="Text" english_name="Text" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="1" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Book" code="Book" english_name="Book" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="11" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Book part" code="Book part" english_name="Book part" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="111" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Journal/Newspaper" code="Journal/Newspaper" english_name="Journal/Newspaper" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="12" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Article contribution" code="Article contribution" english_name="Article contribution" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="121" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Other non-article" code="Other non-article" english_name="Other non-article" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="122" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Conference object" code="Conference object" english_name="Conference object" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="13" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Report" code="Report" english_name="Report" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="14" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Review" code="Review" english_name="Review" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="15" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Course material" code="Course material" english_name="Course material" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="16" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Lecture" code="Lecture" english_name="Lecture" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="17" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Thesis" code="Thesis" english_name="Thesis" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="18" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Bachelor's thesis" code="Bachelor's thesis" english_name="Bachelor's thesis" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="181" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Master's thesis" code="Master's thesis" english_name="Master's thesis" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="182" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Doctoral and postdoctoral thesis" code="Doctoral and postdoctoral thesis" english_name="Doctoral and postdoctoral thesis" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="183" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Manuscript" code="Manuscript" english_name="Manuscript" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="19" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Patent" code="Patent" english_name="Patent" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="1A" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Musical notation" code="Musical notation" english_name="Musical notation" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="2" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Map" code="Map" english_name="Map" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="3" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Audio" code="Audio" english_name="Audio" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="4" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Image/Video" code="Image/Video" english_name="Image/Video" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="5" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Still image" code="Still image" english_name="Still image" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="51" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Moving image/Video" code="Moving image/Video" english_name="Moving image/Video" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="52" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Software" code="Software" english_name="Software" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="6" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Dataset" code="Dataset" english_name="Dataset" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="7" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
<TERM native_name="Unknown" code="Unknown" english_name="Unknown" encoding="BASE">
|
||||
<SYNONYMS>
|
||||
<SYNONYM term="F" encoding="BASE" />
|
||||
</SYNONYMS>
|
||||
<RELATIONS />
|
||||
</TERM>
|
||||
|
||||
</TERMS>
|
||||
</CONFIGURATION>
|
||||
<STATUS>
|
||||
<LAST_UPDATE value="2013-11-18T10:46:36Z" />
|
||||
</STATUS>
|
||||
<SECURITY_PARAMETERS>String</SECURITY_PARAMETERS>
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
||||
|
||||
|
@ -0,0 +1,432 @@
|
||||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="" />
|
||||
<RESOURCE_TYPE value="TransformationRuleDSResourceType" />
|
||||
<RESOURCE_KIND value="TransformationRuleDSResources" />
|
||||
<RESOURCE_URI value="" />
|
||||
<DATE_OF_CREATION value="2024-03-05T11:23:00+00:00" />
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<CONFIGURATION>
|
||||
<SOURCE_METADATA_FORMAT interpretation="cleaned" layout="store" name="dc" />
|
||||
<SINK_METADATA_FORMAT name="oaf_hbase" />
|
||||
<IMPORTED />
|
||||
<SCRIPT>
|
||||
<TITLE>xslt_base2oaf_hadoop</TITLE>
|
||||
<CODE>
|
||||
<xsl:stylesheet xmlns:oaire="http://namespace.openaire.eu/schema/oaire/" xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
|
||||
xmlns:base_dc="http://oai.base-search.net/base_dc/"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-4" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:vocabulary="http://eu/dnetlib/transform/clean" xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
exclude-result-prefixes="xsl vocabulary dateCleaner base_dc" version="2.0">
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:param name="varFP7" select="'corda_______::'" />
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'" />
|
||||
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" />
|
||||
<xsl:param name="index" select="0" />
|
||||
<xsl:param name="transDate" select="current-dateTime()" />
|
||||
|
||||
<xsl:template name="terminate">
|
||||
<xsl:message terminate="yes">
|
||||
record is not compliant, transformation is interrupted.
|
||||
</xsl:message>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
|
||||
|
||||
<!-- TO EVALUATE
|
||||
base_dc:authod_id
|
||||
base_dc:authod_id/base_dc:creator_id
|
||||
base_dc:authod_id/base_dc:creator_name
|
||||
|
||||
example:
|
||||
|
||||
<dc:creator>ALBU, Svetlana</dc:creator>
|
||||
|
||||
<base_dc:authod_id>
|
||||
<base_dc:creator_name>ALBU, Svetlana</base_dc:creator_name>
|
||||
<base_dc:creator_id>https://orcid.org/0000-0002-8648-950X</base_dc:creator_id>
|
||||
</base_dc:authod_id>
|
||||
-->
|
||||
|
||||
<!-- NOT USED
|
||||
base_dc:global_id (I used oai:identifier)
|
||||
base_dc:collection/text()
|
||||
|
||||
base_dc:continent
|
||||
base_dc:country
|
||||
base_dc:year (I used dc:date)
|
||||
dc:coverage
|
||||
dc:language (I used base_dc:lang)
|
||||
base_dc:link (I used dc:identifier)
|
||||
-->
|
||||
|
||||
<metadata>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:title" />
|
||||
<xsl:with-param name="targetElement" select="'dc:title'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:creator/replace(., '^(.*)\|.*$', '$1')" />
|
||||
<xsl:with-param name="targetElement" select="'dc:creator'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:contributor" />
|
||||
<xsl:with-param name="targetElement" select="'dc:contributor'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:description" />
|
||||
<xsl:with-param name="targetElement" select="'dc:description'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:subject" />
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<!-- TODO: I'm not sure if this is the correct encoding -->
|
||||
<xsl:for-each select="//base_dc:classcode|//base_dc:autoclasscode">
|
||||
<dc:subject><xsl:value-of select="concat(@type, ':', .)" /></dc:subject>
|
||||
</xsl:for-each>
|
||||
<!-- END TODO -->
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:publisher" />
|
||||
<xsl:with-param name="targetElement" select="'dc:publisher'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:format" />
|
||||
<xsl:with-param name="targetElement" select="'dc:format'" />
|
||||
</xsl:call-template>
|
||||
|
||||
|
||||
<xsl:for-each select="//base_dc:typenorm">
|
||||
<dc:type>
|
||||
<xsl:value-of select="vocabulary:clean(., 'base:normalized_types')" />
|
||||
</dc:type>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:type" />
|
||||
<xsl:with-param name="targetElement" select="'dc:type'" />
|
||||
</xsl:call-template>
|
||||
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:source" />
|
||||
<xsl:with-param name="targetElement" select="'dc:source'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<dc:language>
|
||||
<xsl:value-of select="vocabulary:clean( //base_dc:lang, 'dnet:languages')" />
|
||||
</dc:language>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:rights" />
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:relation" />
|
||||
<xsl:with-param name="targetElement" select="'dc:relation'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:if test="not(//dc:identifier[starts-with(., 'http')])">
|
||||
<xsl:call-template name="terminate" />
|
||||
</xsl:if>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:identifier[starts-with(., 'http')]" />
|
||||
<xsl:with-param name="targetElement" select="'dc:identifier'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:for-each select="//dc:relation">
|
||||
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:choose>
|
||||
<!-- I used an inline mapping because the field typenorm could be repeated and I have to specify a list of priority -->
|
||||
|
||||
<!-- Book part -->
|
||||
<xsl:when test="//base_dc:typenorm = '111'">
|
||||
<dr:CobjCategory type="publication">0013</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Book -->
|
||||
<xsl:when test="//base_dc:typenorm = '11'">
|
||||
<dr:CobjCategory type="publication">0002</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Article contribution -->
|
||||
<xsl:when test="//base_dc:typenorm = '121'">
|
||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
|
||||
<!-- Journal/Newspaper -->
|
||||
<xsl:when test="//base_dc:typenorm = '12'">
|
||||
<dr:CobjCategory type="publication">0043</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Report -->
|
||||
<xsl:when test="//base_dc:typenorm = '14'">
|
||||
<dr:CobjCategory type="publication">0017</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Review -->
|
||||
<xsl:when test="//base_dc:typenorm = '15'">
|
||||
<dr:CobjCategory type="publication">0015</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Lecture -->
|
||||
<xsl:when test="//base_dc:typenorm = '17'">
|
||||
<dr:CobjCategory type="publication">0010</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Bachelor's thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '181'">
|
||||
<dr:CobjCategory type="publication">0008</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Master's thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '182'">
|
||||
<dr:CobjCategory type="publication">0007</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Doctoral and postdoctoral thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '183'">
|
||||
<dr:CobjCategory type="publication">0006</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '18'">
|
||||
<dr:CobjCategory type="publication">0044</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Patent -->
|
||||
<xsl:when test="//base_dc:typenorm = '1A'">
|
||||
<dr:CobjCategory type="publication">0019</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Text -->
|
||||
<xsl:when test="//base_dc:typenorm = '1'">
|
||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Software -->
|
||||
<xsl:when test="//base_dc:typenorm = '6'">
|
||||
<dr:CobjCategory type="software">0029</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Dataset -->
|
||||
<xsl:when test="//base_dc:typenorm = '7'">
|
||||
<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Still image -->
|
||||
<xsl:when test="//base_dc:typenorm = '51'">
|
||||
<dr:CobjCategory type="other">0025</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Moving image/Video -->
|
||||
<xsl:when test="//base_dc:typenorm = '52'">
|
||||
<dr:CobjCategory type="other">0024</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Image/Video -->
|
||||
<xsl:when test="//base_dc:typenorm = '5'">
|
||||
<dr:CobjCategory type="other">0033</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Audio -->
|
||||
<xsl:when test="//base_dc:typenorm = '4'">
|
||||
<dr:CobjCategory type="other">0030</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Musical notation -->
|
||||
<xsl:when test="//base_dc:typenorm = '2'">
|
||||
<dr:CobjCategory type="other">0020</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Map -->
|
||||
<xsl:when test="//base_dc:typenorm = '3'">
|
||||
<dr:CobjCategory type="other">0020</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Other non-article -->
|
||||
<xsl:when test="//base_dc:typenorm = '122'">
|
||||
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Course material -->
|
||||
<xsl:when test="//base_dc:typenorm = '16'">
|
||||
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Manuscript -->
|
||||
<xsl:when test="//base_dc:typenorm = '19'">
|
||||
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Conference object -->
|
||||
<xsl:when test="//base_dc:typenorm = '13'">
|
||||
<dr:CobjCategory type="publication">0004</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Unknown -->
|
||||
<xsl:when test="//base_dc:typenorm = 'F'">
|
||||
<dr:CobjCategory type="other">0000</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<dr:CobjCategory type="other">0000</dr:CobjCategory>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
|
||||
|
||||
<oaf:accessrights>
|
||||
<xsl:choose>
|
||||
<xsl:when test="//base_dc:oa[.='0']">CLOSED</xsl:when>
|
||||
<xsl:when test="//base_dc:oa[.='1']">OPEN</xsl:when>
|
||||
<xsl:when test="//base_dc:oa[.='2']">UNKNOWN</xsl:when>
|
||||
<xsl:when test="//base_dc:rightsnorm">
|
||||
<xsl:value-of select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:access_modes')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="//dc:rights">
|
||||
<xsl:value-of select="vocabulary:clean( //dc:rights, 'dnet:access_modes')" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>UNKNOWN</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</oaf:accessrights>
|
||||
|
||||
<xsl:for-each select="//base_dc:doi">
|
||||
<oaf:identifier identifierType="doi">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and (not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/')))])">
|
||||
<oaf:identifier identifierType="url">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
|
||||
<oaf:identifier identifierType="handle">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
|
||||
<oaf:identifier identifierType='urn'>
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<oaf:identifier identifierType="oai-original">
|
||||
<xsl:value-of
|
||||
select="//oai:header/oai:identifier" />
|
||||
</oaf:identifier>
|
||||
|
||||
<oaf:hostedBy>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="//base_dc:collname" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="concat('opendoar____::', //base_dc:collection/@opendoar_id)" />
|
||||
</xsl:attribute>
|
||||
</oaf:hostedBy>
|
||||
|
||||
<oaf:collectedFrom>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId" />
|
||||
</xsl:attribute>
|
||||
</oaf:collectedFrom>
|
||||
|
||||
<oaf:dateAccepted>
|
||||
<xsl:value-of select="dateCleaner:dateISO( //dc:date[1] )" />
|
||||
</oaf:dateAccepted>
|
||||
|
||||
<xsl:if test="//base_dc:oa[.='1']">
|
||||
<xsl:for-each select="//dc:relation[starts-with(., 'http')]">
|
||||
<oaf:fulltext>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</oaf:fulltext>
|
||||
</xsl:for-each>
|
||||
</xsl:if>
|
||||
|
||||
<xsl:for-each select="//base_dc:collection/@ror_id">
|
||||
<oaf:relation relType="resultOrganization"
|
||||
subRelType="affiliation"
|
||||
relClass="hasAuthorInstitution"
|
||||
targetType="organization">
|
||||
<xsl:choose>
|
||||
<xsl:when test="contains(.,'https://ror.org/')">
|
||||
<xsl:value-of select="concat('ror_________::', normalize-space(.))" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="concat('ror_________::https://ror.org/', normalize-space(.))" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</oaf:relation>
|
||||
</xsl:for-each>
|
||||
</metadata>
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="allElements">
|
||||
<xsl:param name="sourceElement" />
|
||||
<xsl:param name="targetElement" />
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="{$targetElement}">
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:if test="//oai:header/@status='deleted'">
|
||||
<xsl:call-template name="terminate" />
|
||||
</xsl:if>
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate" />
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
||||
</CODE>
|
||||
</SCRIPT>
|
||||
</CONFIGURATION>
|
||||
<STATUS />
|
||||
<SECURITY_PARAMETERS />
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
@ -0,0 +1,461 @@
|
||||
<RESOURCE_PROFILE>
|
||||
<HEADER>
|
||||
<RESOURCE_IDENTIFIER value="2ad0cdd9-c96c-484c-8b0e-ed56d86891fe_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU=" />
|
||||
<RESOURCE_TYPE value="TransformationRuleDSResourceType" />
|
||||
<RESOURCE_KIND value="TransformationRuleDSResources" />
|
||||
<RESOURCE_URI value="" />
|
||||
<DATE_OF_CREATION value="2024-03-05T11:23:00+00:00" />
|
||||
</HEADER>
|
||||
<BODY>
|
||||
<CONFIGURATION>
|
||||
<SOURCE_METADATA_FORMAT interpretation="cleaned" layout="store" name="dc" />
|
||||
<SINK_METADATA_FORMAT name="odf_hbase" />
|
||||
<IMPORTED />
|
||||
<SCRIPT>
|
||||
<TITLE>xslt_base2odf_hadoop</TITLE>
|
||||
<CODE>
|
||||
<xsl:stylesheet xmlns:oaire="http://namespace.openaire.eu/schema/oaire/" xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO" xmlns:base_dc="http://oai.base-search.net/base_dc/"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-4" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:vocabulary="http://eu/dnetlib/transform/clean" xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
exclude-result-prefixes="xsl vocabulary dateCleaner base_dc" version="2.0">
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:param name="varFP7" select="'corda_______::'" />
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'" />
|
||||
<xsl:param name="repoCode" select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" />
|
||||
<xsl:param name="index" select="0" />
|
||||
<xsl:param name="transDate" select="current-dateTime()" />
|
||||
|
||||
<xsl:template name="terminate">
|
||||
<xsl:message terminate="yes">
|
||||
record is not compliant, transformation is interrupted.
|
||||
</xsl:message>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
|
||||
|
||||
<!-- NOT USED
|
||||
base_dc:global_id (I used oai:identifier)
|
||||
base_dc:collection/text()
|
||||
base_dc:continent
|
||||
base_dc:country
|
||||
dc:coverage
|
||||
dc:source
|
||||
dc:relation
|
||||
dc:type (I used //base_dc:typenorm)
|
||||
dc:language (I used base_dc:lang)
|
||||
base_dc:link (I used dc:identifier)
|
||||
-->
|
||||
|
||||
<metadata>
|
||||
<datacite:resource>
|
||||
|
||||
<xsl:for-each select="//base_dc:doi">
|
||||
<datacite:identifier identifierType="DOI">
|
||||
<xsl:value-of select="." />
|
||||
</datacite:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<datacite:alternateIdentifiers>
|
||||
<xsl:for-each
|
||||
select="distinct-values(//dc:identifier[starts-with(., 'http') and (not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/')))])">
|
||||
<datacite:identifier alternateIdentifierType="url">
|
||||
<xsl:value-of select="." />
|
||||
</datacite:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
|
||||
<datacite:identifier alternateIdentifierType="handle">
|
||||
<xsl:value-of select="." />
|
||||
</datacite:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
|
||||
<datacite:identifier alternateIdentifierType='urn'>
|
||||
<xsl:value-of select="." />
|
||||
</datacite:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<datacite:identifier alternateIdentifierType="oai-original">
|
||||
<xsl:value-of
|
||||
select="//oai:header/oai:identifier" />
|
||||
</datacite:identifier>
|
||||
|
||||
</datacite:alternateIdentifiers>
|
||||
|
||||
<datacite:relatedIdentifiers />
|
||||
|
||||
|
||||
<xsl:for-each select="//base_dc:typenorm">
|
||||
<datacite:resourceType><xsl:value-of select="vocabulary:clean(., 'base:normalized_types')" /></datacite:resourceType>
|
||||
</xsl:for-each>
|
||||
|
||||
<datacite:titles>
|
||||
<xsl:for-each select="//dc:title">
|
||||
<datacite:title>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:title>
|
||||
</xsl:for-each>
|
||||
</datacite:titles>
|
||||
|
||||
<datacite:creators>
|
||||
<xsl:for-each select="//dc:creator">
|
||||
<xsl:variable name="author" select="normalize-space(.)" />
|
||||
<datacite:creator>
|
||||
<datacite:creatorName>
|
||||
<xsl:value-of select="$author" />
|
||||
</datacite:creatorName>
|
||||
<xsl:for-each select="//base_dc:authod_id[normalize-space(./base_dc:creator_name) = $author]/base_dc:creator_id ">
|
||||
<xsl:if test="contains(.,'https://orcid.org/')">
|
||||
<nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID">
|
||||
<xsl:value-of select="substring-after(., 'https://orcid.org/')" />
|
||||
</nameIdentifier>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
</datacite:creator>
|
||||
</xsl:for-each>
|
||||
</datacite:creators>
|
||||
|
||||
<datacite:contributors>
|
||||
<xsl:for-each select="//dc:contributor">
|
||||
<datacite:contributor>
|
||||
<datacite:contributorName>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:contributorName>
|
||||
</datacite:contributor>
|
||||
</xsl:for-each>
|
||||
</datacite:contributors>
|
||||
|
||||
<datacite:descriptions>
|
||||
<xsl:for-each select="//dc:description">
|
||||
<datacite:description descriptionType="Abstract">
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:description>
|
||||
</xsl:for-each>
|
||||
</datacite:descriptions>
|
||||
|
||||
<datacite:subjects>
|
||||
<xsl:for-each select="//dc:subject">
|
||||
<datacite:subject>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:subject>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="//base_dc:classcode|//base_dc:autoclasscode">
|
||||
<datacite:subject subjectScheme="{@type}" classificationCode="{normalize-space(.)}">
|
||||
<!-- TODO the value should be obtained by the Code -->
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:subject>
|
||||
</xsl:for-each>
|
||||
</datacite:subjects>
|
||||
|
||||
<xsl:for-each select="//dc:publisher">
|
||||
<datacite:publisher>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:publisher>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="//base_dc:year">
|
||||
<datacite:publicationYear>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:publicationYear>
|
||||
</xsl:for-each>
|
||||
|
||||
<datacite:formats>
|
||||
<xsl:for-each select="//dc:format">
|
||||
<datacite:format>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</datacite:format>
|
||||
</xsl:for-each>
|
||||
</datacite:formats>
|
||||
|
||||
<datacite:language>
|
||||
<xsl:value-of select="vocabulary:clean( //base_dc:lang, 'dnet:languages')" />
|
||||
</datacite:language>
|
||||
|
||||
<oaf:accessrights>
|
||||
<xsl:if test="//base_dc:oa[.='0']">
|
||||
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_16ec">restricted access</datacite:rights>
|
||||
</xsl:if>
|
||||
<xsl:if test="//base_dc:oa[.='1']">
|
||||
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
|
||||
</xsl:if>
|
||||
<xsl:for-each select="//dc:rights|//base_dc:rightsnorm">
|
||||
<datacite:rights><xsl:value-of select="vocabulary:clean(., 'dnet:access_modes')" /></datacite:rights>
|
||||
</xsl:for-each>
|
||||
</oaf:accessrights>
|
||||
|
||||
</datacite:resource>
|
||||
|
||||
<xsl:for-each select="//dc:relation">
|
||||
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:choose>
|
||||
<!-- I used an inline mapping because the field typenorm could be repeated and I have to specify a list of priority -->
|
||||
|
||||
<!-- Book part -->
|
||||
<xsl:when test="//base_dc:typenorm = '111'">
|
||||
<dr:CobjCategory type="publication">0013</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Book -->
|
||||
<xsl:when test="//base_dc:typenorm = '11'">
|
||||
<dr:CobjCategory type="publication">0002</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Article contribution -->
|
||||
<xsl:when test="//base_dc:typenorm = '121'">
|
||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
|
||||
<!-- Journal/Newspaper -->
|
||||
<xsl:when test="//base_dc:typenorm = '12'">
|
||||
<dr:CobjCategory type="publication">0043</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Report -->
|
||||
<xsl:when test="//base_dc:typenorm = '14'">
|
||||
<dr:CobjCategory type="publication">0017</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Review -->
|
||||
<xsl:when test="//base_dc:typenorm = '15'">
|
||||
<dr:CobjCategory type="publication">0015</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Lecture -->
|
||||
<xsl:when test="//base_dc:typenorm = '17'">
|
||||
<dr:CobjCategory type="publication">0010</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Bachelor's thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '181'">
|
||||
<dr:CobjCategory type="publication">0008</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Master's thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '182'">
|
||||
<dr:CobjCategory type="publication">0007</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Doctoral and postdoctoral thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '183'">
|
||||
<dr:CobjCategory type="publication">0006</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Thesis -->
|
||||
<xsl:when test="//base_dc:typenorm = '18'">
|
||||
<dr:CobjCategory type="publication">0044</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Patent -->
|
||||
<xsl:when test="//base_dc:typenorm = '1A'">
|
||||
<dr:CobjCategory type="publication">0019</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Text -->
|
||||
<xsl:when test="//base_dc:typenorm = '1'">
|
||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Software -->
|
||||
<xsl:when test="//base_dc:typenorm = '6'">
|
||||
<dr:CobjCategory type="software">0029</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Dataset -->
|
||||
<xsl:when test="//base_dc:typenorm = '7'">
|
||||
<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Still image -->
|
||||
<xsl:when test="//base_dc:typenorm = '51'">
|
||||
<dr:CobjCategory type="other">0025</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Moving image/Video -->
|
||||
<xsl:when test="//base_dc:typenorm = '52'">
|
||||
<dr:CobjCategory type="other">0024</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Image/Video -->
|
||||
<xsl:when test="//base_dc:typenorm = '5'">
|
||||
<dr:CobjCategory type="other">0033</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Audio -->
|
||||
<xsl:when test="//base_dc:typenorm = '4'">
|
||||
<dr:CobjCategory type="other">0030</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Musical notation -->
|
||||
<xsl:when test="//base_dc:typenorm = '2'">
|
||||
<dr:CobjCategory type="other">0020</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Map -->
|
||||
<xsl:when test="//base_dc:typenorm = '3'">
|
||||
<dr:CobjCategory type="other">0020</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Other non-article -->
|
||||
<xsl:when test="//base_dc:typenorm = '122'">
|
||||
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Course material -->
|
||||
<xsl:when test="//base_dc:typenorm = '16'">
|
||||
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Manuscript -->
|
||||
<xsl:when test="//base_dc:typenorm = '19'">
|
||||
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Conference object -->
|
||||
<xsl:when test="//base_dc:typenorm = '13'">
|
||||
<dr:CobjCategory type="publication">0004</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
|
||||
<!-- Unknown -->
|
||||
<xsl:when test="//base_dc:typenorm = 'F'">
|
||||
<dr:CobjCategory type="other">0000</dr:CobjCategory>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<dr:CobjCategory type="other">0000</dr:CobjCategory>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
|
||||
<oaf:accessrights>
|
||||
<xsl:choose>
|
||||
<xsl:when test="//base_dc:oa[.='0']">CLOSED</xsl:when>
|
||||
<xsl:when test="//base_dc:oa[.='1']">OPEN</xsl:when>
|
||||
<xsl:when test="//base_dc:oa[.='2']">UNKNOWN</xsl:when>
|
||||
<xsl:when test="//base_dc:rightsnorm">
|
||||
<xsl:value-of select="vocabulary:clean(//base_dc:rightsnorm, 'dnet:access_modes')" />
|
||||
</xsl:when>
|
||||
<xsl:when test="//dc:rights">
|
||||
<xsl:value-of select="vocabulary:clean( //dc:rights, 'dnet:access_modes')" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>UNKNOWN</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</oaf:accessrights>
|
||||
|
||||
<xsl:for-each select="//base_dc:doi">
|
||||
<oaf:identifier identifierType="doi">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each
|
||||
select="distinct-values(//dc:identifier[starts-with(., 'http') and ( not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/')))])">
|
||||
<oaf:identifier identifierType="url">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))">
|
||||
<oaf:identifier identifierType="handle">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])">
|
||||
<oaf:identifier identifierType='urn'>
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<oaf:identifier identifierType="oai-original">
|
||||
<xsl:value-of
|
||||
select="//oai:header/oai:identifier" />
|
||||
</oaf:identifier>
|
||||
|
||||
<oaf:hostedBy>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="//base_dc:collname" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="concat('opendoar____::', //base_dc:collection/@opendoar_id)" />
|
||||
</xsl:attribute>
|
||||
</oaf:hostedBy>
|
||||
|
||||
<oaf:collectedFrom>
|
||||
<xsl:attribute name="name">
|
||||
<xsl:value-of select="$varOfficialName" />
|
||||
</xsl:attribute>
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="$varDataSourceId" />
|
||||
</xsl:attribute>
|
||||
</oaf:collectedFrom>
|
||||
|
||||
<oaf:dateAccepted>
|
||||
<xsl:value-of select="dateCleaner:dateISO( //dc:date[1] )" />
|
||||
</oaf:dateAccepted>
|
||||
|
||||
<xsl:if test="//base_dc:oa[.='1']">
|
||||
<xsl:for-each select="//dc:relation[starts-with(., 'http')]">
|
||||
<oaf:fulltext>
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</oaf:fulltext>
|
||||
</xsl:for-each>
|
||||
</xsl:if>
|
||||
|
||||
<xsl:for-each select="//base_dc:collection/@ror_id">
|
||||
<oaf:relation relType="resultOrganization" subRelType="affiliation" relClass="hasAuthorInstitution" targetType="organization">
|
||||
<xsl:choose>
|
||||
<xsl:when test="contains(.,'https://ror.org/')">
|
||||
<xsl:value-of select="concat('ror_________::', normalize-space(.))" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="concat('ror_________::https://ror.org/', normalize-space(.))" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</oaf:relation>
|
||||
</xsl:for-each>
|
||||
</metadata>
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:if test="//oai:header/@status='deleted'">
|
||||
<xsl:call-template name="terminate" />
|
||||
</xsl:if>
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate" />
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
||||
</CODE>
|
||||
</SCRIPT>
|
||||
</CONFIGURATION>
|
||||
<STATUS />
|
||||
<SECURITY_PARAMETERS />
|
||||
</BODY>
|
||||
</RESOURCE_PROFILE>
|
@ -0,0 +1,120 @@
|
||||
package eu.dnetlib.dhp.collection.orcid
|
||||
|
||||
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
class SparkApplyUpdate(propertyPath: String, args: Array[String], log: Logger)
|
||||
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||
|
||||
/** Here all the spark applications runs this method
|
||||
* where the whole logic of the spark node is defined
|
||||
*/
|
||||
override def run(): Unit = {
|
||||
|
||||
val graphPath: String = parser.get("graphPath")
|
||||
log.info("found parameters graphPath: {}", graphPath)
|
||||
val updatePath: String = parser.get("updatePath")
|
||||
log.info("found parameters updatePath: {}", updatePath)
|
||||
val targetPath: String = parser.get("targetPath")
|
||||
log.info("found parameters targetPath: {}", targetPath)
|
||||
applyTableUpdate(spark, graphPath, updatePath, targetPath)
|
||||
checkUpdate(spark, graphPath, targetPath)
|
||||
moveTable(spark, graphPath, targetPath)
|
||||
|
||||
}
|
||||
|
||||
private def moveTable(spark: SparkSession, graphPath: String, updatePath: String): Unit = {
|
||||
spark.read
|
||||
.load(s"$updatePath/Authors")
|
||||
.repartition(1000)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$graphPath/Authors")
|
||||
spark.read
|
||||
.load(s"$updatePath/Works")
|
||||
.repartition(1000)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$graphPath/Works")
|
||||
spark.read
|
||||
.load(s"$updatePath/Employments")
|
||||
.repartition(1000)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$graphPath/Employments")
|
||||
|
||||
}
|
||||
|
||||
private def updateDataset(
|
||||
inputDataset: DataFrame,
|
||||
idUpdate: DataFrame,
|
||||
updateDataframe: DataFrame,
|
||||
targetPath: String
|
||||
): Unit = {
|
||||
inputDataset
|
||||
.join(idUpdate, inputDataset("orcid").equalTo(idUpdate("orcid")), "leftanti")
|
||||
.select(inputDataset("*"))
|
||||
.unionByName(updateDataframe)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(targetPath)
|
||||
}
|
||||
|
||||
private def checkUpdate(spark: SparkSession, graphPath: String, updatePath: String): Unit = {
|
||||
val totalOriginalAuthors = spark.read.load(s"$graphPath/Authors").count
|
||||
val totalOriginalWorks = spark.read.load(s"$graphPath/Works").count
|
||||
val totalOriginalEmployments = spark.read.load(s"$graphPath/Employments").count
|
||||
val totalUpdateAuthors = spark.read.load(s"$updatePath/Authors").count
|
||||
val totalUpdateWorks = spark.read.load(s"$updatePath/Works").count
|
||||
val totalUpdateEmployments = spark.read.load(s"$updatePath/Employments").count
|
||||
|
||||
log.info("totalOriginalAuthors: {}", totalOriginalAuthors)
|
||||
log.info("totalOriginalWorks: {}", totalOriginalWorks)
|
||||
log.info("totalOriginalEmployments: {}", totalOriginalEmployments)
|
||||
log.info("totalUpdateAuthors: {}", totalUpdateAuthors)
|
||||
log.info("totalUpdateWorks: {}", totalUpdateWorks)
|
||||
log.info("totalUpdateEmployments: {}", totalUpdateEmployments)
|
||||
if (
|
||||
totalUpdateAuthors < totalOriginalAuthors || totalUpdateEmployments < totalOriginalEmployments || totalUpdateWorks < totalOriginalWorks
|
||||
)
|
||||
throw new RuntimeException("The updated Graph contains less elements of the original one")
|
||||
|
||||
}
|
||||
|
||||
private def applyTableUpdate(spark: SparkSession, graphPath: String, updatePath: String, targetPath: String): Unit = {
|
||||
val orcidIDUpdate = spark.read.load(s"$updatePath/Authors").select("orcid")
|
||||
updateDataset(
|
||||
spark.read.load(s"$graphPath/Authors"),
|
||||
orcidIDUpdate,
|
||||
spark.read.load(s"$updatePath/Authors"),
|
||||
s"$targetPath/Authors"
|
||||
)
|
||||
updateDataset(
|
||||
spark.read.load(s"$graphPath/Employments"),
|
||||
orcidIDUpdate,
|
||||
spark.read.load(s"$updatePath/Employments"),
|
||||
s"$targetPath/Employments"
|
||||
)
|
||||
updateDataset(
|
||||
spark.read.load(s"$graphPath/Works"),
|
||||
orcidIDUpdate,
|
||||
spark.read.load(s"$updatePath/Works"),
|
||||
s"$targetPath/Works"
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object SparkApplyUpdate {
|
||||
|
||||
val log: Logger = LoggerFactory.getLogger(SparkGenerateORCIDTable.getClass)
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
new SparkApplyUpdate("/eu/dnetlib/dhp/collection/orcid/apply_orcid_table_parameter.json", args, log)
|
||||
.initialize()
|
||||
.run()
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,324 @@
|
||||
|
||||
package eu.dnetlib.dhp.actionmanager.transformativeagreement;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob;
|
||||
import eu.dnetlib.dhp.actionmanager.opencitations.CreateOpenCitationsASTest;
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 13/02/24
|
||||
*/
|
||||
public class CreateTAActionSetTest {
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
private static final Logger log = LoggerFactory
|
||||
.getLogger(CreateOpenCitationsASTest.class);
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files
|
||||
.createTempDirectory(CreateTAActionSetTest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(CreateTAActionSetTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(CreateTAActionSetTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
void createActionSet() throws Exception {
|
||||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/transformativeagreement/facts.json")
|
||||
.getPath();
|
||||
|
||||
eu.dnetlib.dhp.actionmanager.transformativeagreement.CreateActionSetSparkJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-inputPath",
|
||||
inputPath,
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/actionSet1"
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testNumberofRelations2() throws Exception {
|
||||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-inputPath",
|
||||
inputPath,
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/actionSet2"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.sequenceFile(workingDir.toString() + "/actionSet2", Text.class, Text.class)
|
||||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
.map(aa -> ((Relation) aa.getPayload()));
|
||||
|
||||
assertEquals(23, tmp.count());
|
||||
|
||||
// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testRelationsCollectedFrom() throws Exception {
|
||||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-inputPath",
|
||||
inputPath,
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/actionSet3"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.sequenceFile(workingDir.toString() + "/actionSet3", Text.class, Text.class)
|
||||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
.map(aa -> ((Relation) aa.getPayload()));
|
||||
|
||||
tmp.foreach(r -> {
|
||||
assertEquals(ModelConstants.OPENOCITATIONS_NAME, r.getCollectedfrom().get(0).getValue());
|
||||
assertEquals(ModelConstants.OPENOCITATIONS_ID, r.getCollectedfrom().get(0).getKey());
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testRelationsDataInfo() throws Exception {
|
||||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-inputPath",
|
||||
inputPath,
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/actionSet4"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.sequenceFile(workingDir.toString() + "/actionSet4", Text.class, Text.class)
|
||||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
.map(aa -> ((Relation) aa.getPayload()));
|
||||
|
||||
tmp.foreach(r -> {
|
||||
assertEquals(false, r.getDataInfo().getInferred());
|
||||
assertEquals(false, r.getDataInfo().getDeletedbyinference());
|
||||
assertEquals("0.91", r.getDataInfo().getTrust());
|
||||
assertEquals(
|
||||
eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob.OPENCITATIONS_CLASSID,
|
||||
r.getDataInfo().getProvenanceaction().getClassid());
|
||||
assertEquals(
|
||||
eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob.OPENCITATIONS_CLASSNAME,
|
||||
r.getDataInfo().getProvenanceaction().getClassname());
|
||||
assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, r.getDataInfo().getProvenanceaction().getSchemeid());
|
||||
assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, r.getDataInfo().getProvenanceaction().getSchemename());
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testRelationsSemantics() throws Exception {
|
||||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-inputPath",
|
||||
inputPath,
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/actionSet5"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.sequenceFile(workingDir.toString() + "/actionSet5", Text.class, Text.class)
|
||||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
.map(aa -> ((Relation) aa.getPayload()));
|
||||
|
||||
tmp.foreach(r -> {
|
||||
assertEquals("citation", r.getSubRelType());
|
||||
assertEquals("resultResult", r.getRelType());
|
||||
});
|
||||
assertEquals(23, tmp.filter(r -> r.getRelClass().equals("Cites")).count());
|
||||
assertEquals(0, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testRelationsSourceTargetPrefix() throws Exception {
|
||||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-inputPath",
|
||||
inputPath,
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/actionSet6"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.sequenceFile(workingDir.toString() + "/actionSet6", Text.class, Text.class)
|
||||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
.map(aa -> ((Relation) aa.getPayload()));
|
||||
|
||||
tmp.foreach(r -> {
|
||||
assertEquals("50|doi_________::", r.getSource().substring(0, 17));
|
||||
assertEquals("50|doi_________::", r.getTarget().substring(0, 17));
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testRelationsSourceTargetCouple() throws Exception {
|
||||
final String doi1 = "50|doi_________::"
|
||||
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-015-3684-x"));
|
||||
final String doi2 = "50|doi_________::"
|
||||
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1111/j.1551-2916.2008.02408.x"));
|
||||
final String doi3 = "50|doi_________::"
|
||||
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-014-2114-9"));
|
||||
final String doi4 = "50|doi_________::"
|
||||
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/j.ceramint.2013.09.069"));
|
||||
final String doi5 = "50|doi_________::"
|
||||
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-009-9913-4"));
|
||||
final String doi6 = "50|doi_________::"
|
||||
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/0038-1098(72)90370-5"));
|
||||
|
||||
String inputPath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/actionmanager/opencitations/COCI")
|
||||
.getPath();
|
||||
|
||||
CreateActionSetSparkJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-inputPath",
|
||||
inputPath,
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/actionSet7"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.sequenceFile(workingDir.toString() + "/actionSet7", Text.class, Text.class)
|
||||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
.map(aa -> ((Relation) aa.getPayload()));
|
||||
|
||||
JavaRDD<Relation> check = tmp.filter(r -> r.getSource().equals(doi1) || r.getTarget().equals(doi1));
|
||||
|
||||
assertEquals(5, check.count());
|
||||
|
||||
// check.foreach(r -> {
|
||||
// if (r.getSource().equals(doi2) || r.getSource().equals(doi3) || r.getSource().equals(doi4) ||
|
||||
// r.getSource().equals(doi5) || r.getSource().equals(doi6)) {
|
||||
// assertEquals(ModelConstants.IS_CITED_BY, r.getRelClass());
|
||||
// assertEquals(doi1, r.getTarget());
|
||||
// }
|
||||
// });
|
||||
|
||||
assertEquals(5, check.filter(r -> r.getSource().equals(doi1)).count());
|
||||
check.filter(r -> r.getSource().equals(doi1)).foreach(r -> assertEquals(ModelConstants.CITES, r.getRelClass()));
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class BaseCollectionInfo implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = 5766333937429419647L;
|
||||
|
||||
private String id;
|
||||
private String opendoarId;
|
||||
private String rorId;
|
||||
|
||||
public String getId() {
|
||||
return this.id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getOpendoarId() {
|
||||
return this.opendoarId;
|
||||
}
|
||||
|
||||
public void setOpendoarId(final String opendoarId) {
|
||||
this.opendoarId = opendoarId;
|
||||
}
|
||||
|
||||
public String getRorId() {
|
||||
return this.rorId;
|
||||
}
|
||||
|
||||
public void setRorId(final String rorId) {
|
||||
this.rorId = rorId;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class BaseCollectorPluginTest {
|
||||
|
||||
@Test
|
||||
void testFilterXml() throws Exception {
|
||||
final String xml = IOUtils.toString(getClass().getResourceAsStream("record.xml"));
|
||||
|
||||
final Set<String> validIds = new HashSet<>(Arrays.asList("opendoar____::1234", "opendoar____::4567"));
|
||||
final Set<String> validTypes = new HashSet<>(Arrays.asList("1", "121"));
|
||||
final Set<String> validTypes2 = new HashSet<>(Arrays.asList("1", "11"));
|
||||
|
||||
assertTrue(BaseCollectorPlugin.filterXml(xml, validIds, validTypes));
|
||||
assertTrue(BaseCollectorPlugin.filterXml(xml, validIds, new HashSet<>()));
|
||||
|
||||
assertFalse(BaseCollectorPlugin.filterXml(xml, new HashSet<>(), validTypes));
|
||||
assertFalse(BaseCollectorPlugin.filterXml(xml, validIds, validTypes2));
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class BaseRecordInfo implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = -8848232018350074593L;
|
||||
|
||||
private String id;
|
||||
private List<BaseCollectionInfo> collections = new ArrayList<>();
|
||||
private List<String> paths = new ArrayList<>();
|
||||
private List<String> types = new ArrayList<>();
|
||||
|
||||
public String getId() {
|
||||
return this.id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public List<String> getPaths() {
|
||||
return this.paths;
|
||||
}
|
||||
|
||||
public void setPaths(final List<String> paths) {
|
||||
this.paths = paths;
|
||||
}
|
||||
|
||||
public List<String> getTypes() {
|
||||
return this.types;
|
||||
}
|
||||
|
||||
public void setTypes(final List<String> types) {
|
||||
this.types = types;
|
||||
}
|
||||
|
||||
public List<BaseCollectionInfo> getCollections() {
|
||||
return this.collections;
|
||||
}
|
||||
|
||||
public void setCollections(final List<BaseCollectionInfo> collections) {
|
||||
this.collections = collections;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,94 @@
|
||||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest;
|
||||
import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
|
||||
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
|
||||
import eu.dnetlib.dhp.schema.mdstore.Provenance;
|
||||
import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
|
||||
// @Disabled
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
public class BaseTransfomationTest extends AbstractVocabularyTest {
|
||||
|
||||
private SparkConf sparkConf;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException, ISLookUpException {
|
||||
setUpVocabulary();
|
||||
|
||||
this.sparkConf = new SparkConf();
|
||||
this.sparkConf.setMaster("local[*]");
|
||||
this.sparkConf.set("spark.driver.host", "localhost");
|
||||
this.sparkConf.set("spark.ui.enabled", "false");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testBase2ODF() throws Exception {
|
||||
|
||||
final MetadataRecord mr = new MetadataRecord();
|
||||
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
|
||||
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("record.xml")));
|
||||
|
||||
final XSLTTransformationFunction tr = loadTransformationRule("xml/base2odf.transformationRule.xml");
|
||||
|
||||
final MetadataRecord result = tr.call(mr);
|
||||
|
||||
System.out.println(result.getBody());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testBase2OAF() throws Exception {
|
||||
|
||||
final MetadataRecord mr = new MetadataRecord();
|
||||
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
|
||||
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("record.xml")));
|
||||
|
||||
final XSLTTransformationFunction tr = loadTransformationRule("xml/base2oaf.transformationRule.xml");
|
||||
|
||||
final MetadataRecord result = tr.call(mr);
|
||||
|
||||
System.out.println(result.getBody());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testBase2ODF_wrong_date() throws Exception {
|
||||
|
||||
final MetadataRecord mr = new MetadataRecord();
|
||||
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
|
||||
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("record_wrong_1.xml")));
|
||||
|
||||
final XSLTTransformationFunction tr = loadTransformationRule("xml/base2oaf.transformationRule.xml");
|
||||
|
||||
assertThrows(NullPointerException.class, () -> {
|
||||
final MetadataRecord result = tr.call(mr);
|
||||
System.out.println(result.getBody());
|
||||
});
|
||||
}
|
||||
|
||||
private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception {
|
||||
final String xslt = new SAXReader()
|
||||
.read(this.getClass().getResourceAsStream(path))
|
||||
.selectSingleNode("//CODE/*")
|
||||
.asXML();
|
||||
|
||||
final LongAccumulator la = new LongAccumulator();
|
||||
|
||||
return new XSLTTransformationFunction(new AggregationCounter(la, la, la), xslt, 0, this.vocabularies);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
|
||||
package eu.dnetlib.dhp.collection.plugin.utils;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class JsonUtilsTest {
|
||||
|
||||
static private String wrapped(String xml) {
|
||||
return "<?xml version=\"1.0\" encoding=\"UTF-8\"?><recordWrap>" + xml + "</recordWrap>";
|
||||
}
|
||||
|
||||
@Test
|
||||
void keyStartWithDigit() {
|
||||
assertEquals(
|
||||
wrapped("<m_100><n_200v>null</n_200v></m_100>"),
|
||||
JsonUtils.convertToXML("{\"100\" : {\"200v\" : null}}"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void keyStartWithSpecialchars() {
|
||||
assertEquals(
|
||||
wrapped("<_parent><_nest1><_nest2>null</_nest2></_nest1></_parent>"),
|
||||
JsonUtils.convertToXML("{\" parent\" : {\"-nest1\" : {\".nest2\" : null}}}"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void encodeArray() {
|
||||
assertEquals(
|
||||
wrapped("<_parent.child>1</_parent.child><_parent.child>2</_parent.child>"),
|
||||
JsonUtils.convertToXML("{\" parent.child\":[1, 2]}"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void arrayOfObjects() {
|
||||
assertEquals(
|
||||
wrapped("<parent><id>1</id></parent><parent><id>2</id></parent>"),
|
||||
JsonUtils.convertToXML("{\"parent\": [{\"id\": 1}, {\"id\": 2}]}"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void removeControlCharacters() {
|
||||
assertEquals(
|
||||
wrapped("<m_100><n_200v>Test</n_200v></m_100>"),
|
||||
JsonUtils.convertToXML("{\"100\" : {\"200v\" : \"\\u0000\\u000cTest\"}}"));
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,966 @@
|
||||
<record:record path="/0000-0001-6816-8350" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:education="http://www.orcid.org/ns/education" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:error="http://www.orcid.org/ns/error" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:email="http://www.orcid.org/ns/email" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:address="http://www.orcid.org/ns/address" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:service="http://www.orcid.org/ns/service" xmlns:person="http://www.orcid.org/ns/person" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:peer-review="http://www.orcid.org/ns/peer-review" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:research-resource="http://www.orcid.org/ns/research-resource">
|
||||
<common:orcid-identifier>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:orcid-identifier>
|
||||
<preferences:preferences>
|
||||
<preferences:locale>en</preferences:locale>
|
||||
</preferences:preferences>
|
||||
<history:history>
|
||||
<history:creation-method>Direct</history:creation-method>
|
||||
<history:submission-date>2016-01-06T05:08:45.720Z</history:submission-date>
|
||||
<common:last-modified-date>2024-01-02T20:07:05.186Z</common:last-modified-date>
|
||||
<history:claimed>true</history:claimed>
|
||||
<history:verified-email>true</history:verified-email>
|
||||
<history:verified-primary-email>true</history:verified-primary-email>
|
||||
</history:history>
|
||||
<person:person path="/0000-0001-6816-8350/person">
|
||||
<common:last-modified-date>2023-12-02T13:32:05.269Z</common:last-modified-date>
|
||||
<other-name:other-names path="/0000-0001-6816-8350/other-names"/>
|
||||
<researcher-url:researcher-urls path="/0000-0001-6816-8350/researcher-urls">
|
||||
<common:last-modified-date>2016-02-09T09:18:18.417Z</common:last-modified-date>
|
||||
<researcher-url:researcher-url put-code="633431" visibility="public" path="/0000-0001-6816-8350/researcher-urls/633431" display-index="0">
|
||||
<common:created-date>2016-02-09T09:18:18.416Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T09:18:18.417Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<researcher-url:url-name>Dr Michael Muchiri</researcher-url:url-name>
|
||||
<researcher-url:url>http://www.rmit.edu.au/contact/staff-contacts/academic-staff/m/muchiri-dr-michael</researcher-url:url>
|
||||
</researcher-url:researcher-url>
|
||||
</researcher-url:researcher-urls>
|
||||
<email:emails path="/0000-0001-6816-8350/email"/>
|
||||
<address:addresses path="/0000-0001-6816-8350/address">
|
||||
<common:last-modified-date>2023-12-02T13:32:05.269Z</common:last-modified-date>
|
||||
<address:address put-code="897528" visibility="public" path="/0000-0001-6816-8350/address/897528" display-index="2">
|
||||
<common:created-date>2018-02-13T02:32:04.094Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:32:05.269Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<address:country>AU</address:country>
|
||||
</address:address>
|
||||
<address:address put-code="3191142" visibility="public" path="/0000-0001-6816-8350/address/3191142" display-index="1">
|
||||
<common:created-date>2023-12-02T13:32:05.260Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:32:05.260Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<address:country>SA</address:country>
|
||||
</address:address>
|
||||
</address:addresses>
|
||||
<keyword:keywords path="/0000-0001-6816-8350/keywords">
|
||||
<common:last-modified-date>2023-12-02T13:31:16.269Z</common:last-modified-date>
|
||||
<keyword:keyword put-code="368304" visibility="public" path="/0000-0001-6816-8350/keywords/368304" display-index="4">
|
||||
<common:created-date>2016-02-09T09:16:44.001Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:31:16.269Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<keyword:content>Organizational Behavior</keyword:content>
|
||||
</keyword:keyword>
|
||||
<keyword:keyword put-code="368303" visibility="public" path="/0000-0001-6816-8350/keywords/368303" display-index="3">
|
||||
<common:created-date>2016-02-09T09:16:27.374Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:31:16.269Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<keyword:content>Organizational Leadership</keyword:content>
|
||||
</keyword:keyword>
|
||||
<keyword:keyword put-code="368306" visibility="public" path="/0000-0001-6816-8350/keywords/368306" display-index="2">
|
||||
<common:created-date>2016-02-09T09:17:08.998Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:31:16.269Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<keyword:content>Organizational performance</keyword:content>
|
||||
</keyword:keyword>
|
||||
<keyword:keyword put-code="3590814" visibility="public" path="/0000-0001-6816-8350/keywords/3590814" display-index="1">
|
||||
<common:created-date>2023-12-02T13:31:16.259Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:31:16.259Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<keyword:content>Thriving at work</keyword:content>
|
||||
</keyword:keyword>
|
||||
</keyword:keywords>
|
||||
<external-identifier:external-identifiers path="/0000-0001-6816-8350/external-identifiers">
|
||||
<common:last-modified-date>2018-04-10T00:49:55.386Z</common:last-modified-date>
|
||||
<external-identifier:external-identifier put-code="998076" visibility="public" path="/0000-0001-6816-8350/external-identifiers/998076" display-index="0">
|
||||
<common:created-date>2018-04-10T00:49:55.385Z</common:created-date>
|
||||
<common:last-modified-date>2018-04-10T00:49:55.386Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/0000-0003-1377-5676</common:uri>
|
||||
<common:path>0000-0003-1377-5676</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>ResearcherID</common:source-name>
|
||||
<common:assertion-origin-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:assertion-origin-orcid>
|
||||
</common:source>
|
||||
<common:external-id-type>ResearcherID</common:external-id-type>
|
||||
<common:external-id-value>D-1929-2018</common:external-id-value>
|
||||
<common:external-id-url>http://www.researcherid.com/rid/D-1929-2018</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</external-identifier:external-identifier>
|
||||
</external-identifier:external-identifiers>
|
||||
</person:person>
|
||||
<activities:activities-summary path="/0000-0001-6816-8350/activities">
|
||||
<common:last-modified-date>2023-12-02T13:28:26.051Z</common:last-modified-date>
|
||||
<activities:distinctions path="/0000-0001-6816-8350/distinctions"/>
|
||||
<activities:educations path="/0000-0001-6816-8350/educations">
|
||||
<common:last-modified-date>2018-02-13T02:33:38.225Z</common:last-modified-date>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2016-02-09T06:55:21.838Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<education:education-summary put-code="1549986" display-index="0" path="/0000-0001-6816-8350/education/1549986" visibility="public">
|
||||
<common:created-date>2016-02-09T06:54:39.199Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T06:55:21.838Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Management</common:department-name>
|
||||
<common:role-title>PhD</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2021</common:year>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2022</common:year>
|
||||
<common:month>02</common:month>
|
||||
<common:day>12</common:day>
|
||||
</common:end-date>
|
||||
<common:start-date>
|
||||
<common:year>2003</common:year>
|
||||
<common:month>03</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2007</common:year>
|
||||
<common:month>03</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>University of New England</common:name>
|
||||
<common:address>
|
||||
<common:city>Armidale</common:city>
|
||||
<common:region>NSW</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>1319</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</education:education-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2018-02-13T02:33:38.225Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<education:education-summary put-code="1549990" display-index="0" path="/0000-0001-6816-8350/education/1549990" visibility="public">
|
||||
<common:created-date>2016-02-09T06:57:04.181Z</common:created-date>
|
||||
<common:last-modified-date>2018-02-13T02:33:38.225Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Psychology</common:department-name>
|
||||
<common:role-title>Master of Science (Industrial and Organizational) Psychology</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>1998</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2000</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Universitas Gadjah Mada</common:name>
|
||||
<common:address>
|
||||
<common:city>Yogyakarta</common:city>
|
||||
<common:region>Daerah Istimewa Yogyakart</common:region>
|
||||
<common:country>ID</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>59166</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</education:education-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2018-02-13T02:33:35.821Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<education:education-summary put-code="1549998" display-index="0" path="/0000-0001-6816-8350/education/1549998" visibility="public">
|
||||
<common:created-date>2016-02-09T06:58:59.869Z</common:created-date>
|
||||
<common:last-modified-date>2018-02-13T02:33:35.821Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Education</common:department-name>
|
||||
<common:role-title>Bachelor of Education (Honors)</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>1988</common:year>
|
||||
<common:month>03</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>1991</common:year>
|
||||
<common:month>03</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Kenyatta University</common:name>
|
||||
<common:address>
|
||||
<common:city>Nairobi</common:city>
|
||||
<common:region>Nairobi</common:region>
|
||||
<common:country>KE</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>107864</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</education:education-summary>
|
||||
</activities:affiliation-group>
|
||||
</activities:educations>
|
||||
<activities:employments path="/0000-0001-6816-8350/employments">
|
||||
<common:last-modified-date>2023-12-02T13:28:26.051Z</common:last-modified-date>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2023-12-02T13:28:26.051Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="21884863" display-index="1" path="/0000-0001-6816-8350/employment/21884863" visibility="public">
|
||||
<common:created-date>2023-12-02T13:28:26.051Z</common:created-date>
|
||||
<common:last-modified-date>2023-12-02T13:28:26.051Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Management</common:department-name>
|
||||
<common:role-title>Associate Professor in Management</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2023</common:year>
|
||||
<common:month>08</common:month>
|
||||
<common:day>20</common:day>
|
||||
</common:start-date>
|
||||
<common:organization>
|
||||
<common:name>Alfaisal University</common:name>
|
||||
<common:address>
|
||||
<common:city>Riyadh</common:city>
|
||||
<common:country>SA</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>https://ror.org/00cdrtq48</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
<common:url>https://faculty.alfaisal.edu/user/mmuchiri</common:url>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2016-02-09T07:00:06.052Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="1550002" display-index="0" path="/0000-0001-6816-8350/employment/1550002" visibility="public">
|
||||
<common:created-date>2016-02-09T07:00:06.052Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T07:00:06.052Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Management</common:department-name>
|
||||
<common:role-title>Senior Lecturer</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2014</common:year>
|
||||
<common:month>02</common:month>
|
||||
</common:start-date>
|
||||
<common:organization>
|
||||
<common:name>RMIT University</common:name>
|
||||
<common:address>
|
||||
<common:city>Melbourne</common:city>
|
||||
<common:region>VIC</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>5376</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2016-02-09T07:01:08.398Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="1550007" display-index="0" path="/0000-0001-6816-8350/employment/1550007" visibility="public">
|
||||
<common:created-date>2016-02-09T07:01:08.398Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T07:01:08.398Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Management</common:department-name>
|
||||
<common:role-title>Senior Lecturer in Human Resource Management</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2010</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2014</common:year>
|
||||
<common:month>02</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Central Queensland University</common:name>
|
||||
<common:address>
|
||||
<common:city>Rockhampton</common:city>
|
||||
<common:region>QLD</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>273488</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2016-02-09T07:01:47.814Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="1550010" display-index="0" path="/0000-0001-6816-8350/employment/1550010" visibility="public">
|
||||
<common:created-date>2016-02-09T07:01:47.814Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T07:01:47.814Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Management</common:department-name>
|
||||
<common:role-title>Lecturer in Management</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2007</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2010</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Central Queensland University</common:name>
|
||||
<common:address>
|
||||
<common:city>Rockhampton</common:city>
|
||||
<common:region>QLD</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>273488</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2018-02-13T02:33:13.213Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="1550017" display-index="0" path="/0000-0001-6816-8350/employment/1550017" visibility="public">
|
||||
<common:created-date>2016-02-09T07:03:42.180Z</common:created-date>
|
||||
<common:last-modified-date>2018-02-13T02:33:13.213Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Human Resource Development Division</common:department-name>
|
||||
<common:role-title>Chief Human Resource Development Officer</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2005</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2007</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Government of Kenya Directorate of Personnel Management</common:name>
|
||||
<common:address>
|
||||
<common:city>Nairobi</common:city>
|
||||
<common:region>Nairobi</common:region>
|
||||
<common:country>KE</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>360256</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2016-02-09T07:05:02.300Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="1550020" display-index="0" path="/0000-0001-6816-8350/employment/1550020" visibility="public">
|
||||
<common:created-date>2016-02-09T07:05:02.300Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T07:05:02.300Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Human Resource Development Division</common:department-name>
|
||||
<common:role-title>Human Resource Development Officer</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2001</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2005</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Government of Kenya Directorate of Personnel Management</common:name>
|
||||
<common:address>
|
||||
<common:city>Nairobi</common:city>
|
||||
<common:region>Nairobi</common:region>
|
||||
<common:country>KE</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>360256</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2016-02-09T07:36:52.398Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="1550050" display-index="0" path="/0000-0001-6816-8350/employment/1550050" visibility="public">
|
||||
<common:created-date>2016-02-09T07:36:52.398Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T07:36:52.398Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<common:department-name>Public Sector Management Technical Assistance Project</common:department-name>
|
||||
<common:role-title>Project Coordinator for Development Learning Centre</common:role-title>
|
||||
<common:start-date>
|
||||
<common:year>2002</common:year>
|
||||
<common:month>08</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2003</common:year>
|
||||
<common:month>03</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Government of Kenya Directorate of Personnel Management</common:name>
|
||||
<common:address>
|
||||
<common:city>Nairobi</common:city>
|
||||
<common:region>Nairobi</common:region>
|
||||
<common:country>KE</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>360256</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
</activities:employments>
|
||||
<activities:fundings path="/0000-0001-6816-8350/fundings">
|
||||
<common:last-modified-date>2016-02-09T09:05:27.100Z</common:last-modified-date>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2016-02-09T09:05:27.100Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<funding:funding-summary put-code="150520" path="/0000-0001-6816-8350/funding/150520" visibility="public" display-index="6">
|
||||
<common:created-date>2016-02-09T09:05:27.100Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T09:05:27.100Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<funding:title>
|
||||
<common:title>A cross-country examination of Employee Wellbeing, Leadership, High Performance Work Systems and Innovative Behaviours</common:title>
|
||||
</funding:title>
|
||||
<funding:type>grant</funding:type>
|
||||
<common:start-date>
|
||||
<common:year>2016</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2016</common:year>
|
||||
<common:month>12</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>RMIT University</common:name>
|
||||
<common:address>
|
||||
<common:city>VIC</common:city>
|
||||
<common:region>VIC</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/501100001780</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</funding:funding-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2016-02-09T09:03:51.641Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<funding:funding-summary put-code="150518" path="/0000-0001-6816-8350/funding/150518" visibility="public" display-index="5">
|
||||
<common:created-date>2016-02-09T09:03:51.641Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T09:03:51.641Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<funding:title>
|
||||
<common:title>Leading Safe and Thriving Organisations: An Investigation of the Relationships between Leadership, Thriving Behaviour, Authentic Followership and Safety Climate in an Australian Multinational Enterprise</common:title>
|
||||
</funding:title>
|
||||
<funding:type>grant</funding:type>
|
||||
<common:start-date>
|
||||
<common:year>2015</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2015</common:year>
|
||||
<common:month>12</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>RMIT University</common:name>
|
||||
<common:address>
|
||||
<common:city>VIC</common:city>
|
||||
<common:region>VIC</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/501100001780</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</funding:funding-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2016-02-09T09:02:28.297Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<funding:funding-summary put-code="150516" path="/0000-0001-6816-8350/funding/150516" visibility="public" display-index="4">
|
||||
<common:created-date>2016-02-09T09:02:28.297Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T09:02:28.297Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<funding:title>
|
||||
<common:title>A multilevel, cross-country examination of leadership, followership and innovative behaviours in Australia and Indonesia. </common:title>
|
||||
</funding:title>
|
||||
<funding:type>grant</funding:type>
|
||||
<common:start-date>
|
||||
<common:year>2015</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2015</common:year>
|
||||
<common:month>12</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>RMIT University</common:name>
|
||||
<common:address>
|
||||
<common:city>VIC</common:city>
|
||||
<common:region>VIC</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/501100001780</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</funding:funding-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2016-02-09T09:00:51.749Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<funding:funding-summary put-code="150514" path="/0000-0001-6816-8350/funding/150514" visibility="public" display-index="3">
|
||||
<common:created-date>2016-02-09T09:00:51.749Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T09:00:51.749Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<funding:title>
|
||||
<common:title>Workplace safety and positive leadership: Exploring relationships between leader behaviours, organisational climate, safety climate, safety citizenship behaviours and innovative behaviours within city councils in Victoria </common:title>
|
||||
</funding:title>
|
||||
<funding:type>grant</funding:type>
|
||||
<common:start-date>
|
||||
<common:year>2014</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2014</common:year>
|
||||
<common:month>12</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>RMIT University</common:name>
|
||||
<common:address>
|
||||
<common:city>VIC</common:city>
|
||||
<common:region>VIC</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/501100001780</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>FUNDREF</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</funding:funding-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2016-02-09T07:46:44.919Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<funding:funding-summary put-code="150485" path="/0000-0001-6816-8350/funding/150485" visibility="public" display-index="0">
|
||||
<common:created-date>2016-02-09T07:46:44.919Z</common:created-date>
|
||||
<common:last-modified-date>2016-02-09T07:46:44.919Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-6816-8350</common:uri>
|
||||
<common:path>0000-0001-6816-8350</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
</common:source>
|
||||
<funding:title>
|
||||
<common:title>Sustainable Business Model for Central Queensland Regional Information Systems.</common:title>
|
||||
</funding:title>
|
||||
<funding:type>grant</funding:type>
|
||||
<common:start-date>
|
||||
<common:year>2008</common:year>
|
||||
<common:month>01</common:month>
|
||||
</common:start-date>
|
||||
<common:end-date>
|
||||
<common:year>2008</common:year>
|
||||
<common:month>12</common:month>
|
||||
</common:end-date>
|
||||
<common:organization>
|
||||
<common:name>Department of Local Government, Planning, Sport and Recreation, Queensland, Australia </common:name>
|
||||
<common:address>
|
||||
<common:city>Rockhampton</common:city>
|
||||
<common:region>Central Queensland</common:region>
|
||||
<common:country>AU</common:country>
|
||||
</common:address>
|
||||
</common:organization>
|
||||
</funding:funding-summary>
|
||||
</activities:group>
|
||||
</activities:fundings>
|
||||
<activities:invited-positions path="/0000-0001-6816-8350/invited-positions"/>
|
||||
<activities:memberships path="/0000-0001-6816-8350/memberships"/>
|
||||
<activities:peer-reviews path="/0000-0001-6816-8350/peer-reviews">
|
||||
<common:last-modified-date>2023-05-31T05:53:44.542Z</common:last-modified-date>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2023-05-31T05:53:44.542Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>peer-review</common:external-id-type>
|
||||
<common:external-id-value>issn:0167-4544</common:external-id-value>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<activities:peer-review-group>
|
||||
<common:last-modified-date>2023-02-28T06:51:52.426Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>source-work-id</common:external-id-type>
|
||||
<common:external-id-value>c9bdf086-cfee-4cd9-bcfb-268cc5423248</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">c9bdf086-cfee-4cd9-bcfb-268cc5423248</common:external-id-normalized>
|
||||
<common:external-id-url></common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<peer-review:peer-review-summary put-code="8741329" path="/0000-0001-6816-8350/peer-review/8741329" visibility="public" display-index="0">
|
||||
<common:created-date>2023-02-28T06:51:52.426Z</common:created-date>
|
||||
<common:last-modified-date>2023-02-28T06:51:52.426Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/APP-945VYTN20C7BZXYT</common:uri>
|
||||
<common:path>APP-945VYTN20C7BZXYT</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Springer Nature @ Editorial Manager</common:source-name>
|
||||
</common:source>
|
||||
<peer-review:reviewer-role>reviewer</peer-review:reviewer-role>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>source-work-id</common:external-id-type>
|
||||
<common:external-id-value>c9bdf086-cfee-4cd9-bcfb-268cc5423248</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">c9bdf086-cfee-4cd9-bcfb-268cc5423248</common:external-id-normalized>
|
||||
<common:external-id-url></common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<peer-review:review-type>review</peer-review:review-type>
|
||||
<peer-review:completion-date>
|
||||
<common:year>2023</common:year>
|
||||
</peer-review:completion-date>
|
||||
<peer-review:review-group-id>issn:0167-4544</peer-review:review-group-id>
|
||||
<peer-review:convening-organization>
|
||||
<common:name>Springer Nature</common:name>
|
||||
<common:address>
|
||||
<common:city>New York</common:city>
|
||||
<common:country>US</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>grid.467660.5</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>GRID</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</peer-review:convening-organization>
|
||||
</peer-review:peer-review-summary>
|
||||
</activities:peer-review-group>
|
||||
<activities:peer-review-group>
|
||||
<common:last-modified-date>2023-05-31T05:53:44.542Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>source-work-id</common:external-id-type>
|
||||
<common:external-id-value>c442840b-5807-459d-802a-303d8ba4e25e</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">c442840b-5807-459d-802a-303d8ba4e25e</common:external-id-normalized>
|
||||
<common:external-id-url></common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<peer-review:peer-review-summary put-code="9680570" path="/0000-0001-6816-8350/peer-review/9680570" visibility="public" display-index="0">
|
||||
<common:created-date>2023-05-31T05:53:44.542Z</common:created-date>
|
||||
<common:last-modified-date>2023-05-31T05:53:44.542Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/APP-945VYTN20C7BZXYT</common:uri>
|
||||
<common:path>APP-945VYTN20C7BZXYT</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Springer Nature @ Editorial Manager</common:source-name>
|
||||
</common:source>
|
||||
<peer-review:reviewer-role>reviewer</peer-review:reviewer-role>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>source-work-id</common:external-id-type>
|
||||
<common:external-id-value>c442840b-5807-459d-802a-303d8ba4e25e</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">c442840b-5807-459d-802a-303d8ba4e25e</common:external-id-normalized>
|
||||
<common:external-id-url></common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<peer-review:review-type>review</peer-review:review-type>
|
||||
<peer-review:completion-date>
|
||||
<common:year>2023</common:year>
|
||||
</peer-review:completion-date>
|
||||
<peer-review:review-group-id>issn:0167-4544</peer-review:review-group-id>
|
||||
<peer-review:convening-organization>
|
||||
<common:name>Springer Nature</common:name>
|
||||
<common:address>
|
||||
<common:city>New York</common:city>
|
||||
<common:country>US</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>grid.467660.5</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>GRID</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</peer-review:convening-organization>
|
||||
</peer-review:peer-review-summary>
|
||||
</activities:peer-review-group>
|
||||
</activities:group>
|
||||
</activities:peer-reviews>
|
||||
<activities:qualifications path="/0000-0001-6816-8350/qualifications"/>
|
||||
<activities:research-resources path="/0000-0001-6816-8350/research-resources"/>
|
||||
<activities:services path="/0000-0001-6816-8350/services"/>
|
||||
<activities:works path="/0000-0001-6816-8350/works">
|
||||
<common:last-modified-date>2023-06-02T20:12:00.338Z</common:last-modified-date>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2023-06-02T20:12:00.338Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.4337/9781800881945.00020</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.4337/9781800881945.00020</common:external-id-normalized>
|
||||
<common:external-id-url>https://doi.org/10.4337/9781800881945.00020</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<work:work-summary put-code="134891279" path="/0000-0001-6816-8350/work/134891279" visibility="public" display-index="0">
|
||||
<common:created-date>2023-05-11T21:05:54.188Z</common:created-date>
|
||||
<common:last-modified-date>2023-06-02T20:12:00.338Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/0000-0001-9884-1913</common:uri>
|
||||
<common:path>0000-0001-9884-1913</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Crossref</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Ethical leadership as workplace innovation and enabler for employee commitment and innovative work behaviours in Vietnam</common:title>
|
||||
</work:title>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.4337/9781800881945.00020</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.4337/9781800881945.00020</common:external-id-normalized>
|
||||
<common:external-id-url>https://doi.org/10.4337/9781800881945.00020</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>https://doi.org/10.4337/9781800881945.00020</common:url>
|
||||
<work:type>book-chapter</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2023</common:year>
|
||||
<common:month>05</common:month>
|
||||
<common:day>26</common:day>
|
||||
</common:publication-date>
|
||||
</work:work-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2023-03-01T11:30:31.972Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.1007/s10551-022-05081-6</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.1007/s10551-022-05081-6</common:external-id-normalized>
|
||||
<common:external-id-url>https://doi.org/10.1007/s10551-022-05081-6</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<work:work-summary put-code="110048777" path="/0000-0001-6816-8350/work/110048777" visibility="public" display-index="0">
|
||||
<common:created-date>2022-03-18T03:36:55.927Z</common:created-date>
|
||||
<common:last-modified-date>2023-03-01T11:30:31.972Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/0000-0001-9884-1913</common:uri>
|
||||
<common:path>0000-0001-9884-1913</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Crossref</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>Unethical Leadership: Review, Synthesis and Directions for Future Research</common:title>
|
||||
</work:title>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.1007/s10551-022-05081-6</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.1007/s10551-022-05081-6</common:external-id-normalized>
|
||||
<common:external-id-url>https://doi.org/10.1007/s10551-022-05081-6</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>https://doi.org/10.1007/s10551-022-05081-6</common:url>
|
||||
<work:type>journal-article</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2023</common:year>
|
||||
<common:month>03</common:month>
|
||||
</common:publication-date>
|
||||
<work:journal-title>Journal of Business Ethics</work:journal-title>
|
||||
</work:work-summary>
|
||||
</activities:group>
|
||||
<activities:group>
|
||||
<common:last-modified-date>2022-05-28T18:16:16.575Z</common:last-modified-date>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.1017/jmo.2019.33</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.1017/jmo.2019.33</common:external-id-normalized>
|
||||
<common:external-id-url>https://doi.org/10.1017/jmo.2019.33</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<work:work-summary put-code="57272180" path="/0000-0001-6816-8350/work/57272180" visibility="public" display-index="0">
|
||||
<common:created-date>2019-05-10T07:23:14.608Z</common:created-date>
|
||||
<common:last-modified-date>2022-05-28T18:16:16.575Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/0000-0001-9884-1913</common:uri>
|
||||
<common:path>0000-0001-9884-1913</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Crossref</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>And now for something completely different: Reframing social processes of leadership theory using positive organisational behaviour</common:title>
|
||||
</work:title>
|
||||
<common:external-ids>
|
||||
<common:external-id>
|
||||
<common:external-id-type>doi</common:external-id-type>
|
||||
<common:external-id-value>10.1017/jmo.2019.33</common:external-id-value>
|
||||
<common:external-id-normalized transient="true">10.1017/jmo.2019.33</common:external-id-normalized>
|
||||
<common:external-id-url>https://doi.org/10.1017/jmo.2019.33</common:external-id-url>
|
||||
<common:external-id-relationship>self</common:external-id-relationship>
|
||||
</common:external-id>
|
||||
</common:external-ids>
|
||||
<common:url>https://doi.org/10.1017/jmo.2019.33</common:url>
|
||||
<work:type>journal-article</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2019</common:year>
|
||||
<common:month>05</common:month>
|
||||
<common:day>09</common:day>
|
||||
</common:publication-date>
|
||||
<work:journal-title>Journal of Management & Organization</work:journal-title>
|
||||
</work:work-summary>
|
||||
</activities:group>
|
||||
</activities:works>
|
||||
</activities:activities-summary>
|
||||
</record:record>
|
@ -0,0 +1,60 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<activities:employments path="/0000-0002-0456-1185/employments" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:education="http://www.orcid.org/ns/education" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:error="http://www.orcid.org/ns/error" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:email="http://www.orcid.org/ns/email" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:address="http://www.orcid.org/ns/address" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:service="http://www.orcid.org/ns/service" xmlns:person="http://www.orcid.org/ns/person" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:peer-review="http://www.orcid.org/ns/peer-review" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:research-resource="http://www.orcid.org/ns/research-resource">
|
||||
<common:last-modified-date>2024-01-07T23:59:38.869Z</common:last-modified-date>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2024-01-07T23:59:38.869Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="22127142" display-index="0" path="/0000-0002-0456-1185/employment/22127142" visibility="public">
|
||||
<common:created-date>2024-01-07T23:59:38.869Z</common:created-date>
|
||||
<common:last-modified-date>2024-01-07T23:59:38.869Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-client-id>
|
||||
<common:uri>https://orcid.org/client/APP-N0TAO4G9BBK9PWHT</common:uri>
|
||||
<common:path>APP-N0TAO4G9BBK9PWHT</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-client-id>
|
||||
<common:source-name>Tampere University</common:source-name>
|
||||
</common:source>
|
||||
<common:organization>
|
||||
<common:name>Tampere University</common:name>
|
||||
<common:address>
|
||||
<common:city>Tampere</common:city>
|
||||
<common:country>FI</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>https://ror.org/033003e23</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>ROR</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
<activities:affiliation-group>
|
||||
<common:last-modified-date>2019-01-03T17:00:05.658Z</common:last-modified-date>
|
||||
<common:external-ids/>
|
||||
<employment:employment-summary put-code="3291239" display-index="1" path="/0000-0002-0456-1185/employment/3291239" visibility="public">
|
||||
<common:created-date>2017-02-26T04:46:20.917Z</common:created-date>
|
||||
<common:last-modified-date>2019-01-03T17:00:05.658Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0002-0456-1185</common:uri>
|
||||
<common:path>0000-0002-0456-1185</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Tiina Manninen</common:source-name>
|
||||
</common:source>
|
||||
<common:department-name> Faculty of Medicine and Health Technology</common:department-name>
|
||||
<common:role-title>Academy Research Fellow</common:role-title>
|
||||
<common:organization>
|
||||
<common:name>Tampere University</common:name>
|
||||
<common:address>
|
||||
<common:city>Tampere</common:city>
|
||||
<common:country>FI</common:country>
|
||||
</common:address>
|
||||
<common:disambiguated-organization>
|
||||
<common:disambiguated-organization-identifier>7839</common:disambiguated-organization-identifier>
|
||||
<common:disambiguation-source>RINGGOLD</common:disambiguation-source>
|
||||
</common:disambiguated-organization>
|
||||
</common:organization>
|
||||
</employment:employment-summary>
|
||||
</activities:affiliation-group>
|
||||
</activities:employments>
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -0,0 +1,58 @@
|
||||
<record>
|
||||
<header xmlns="http://www.openarchives.org/OAI/2.0/">
|
||||
<identifier>ftdoajarticles:oai:doaj.org/article:e2d5b5126b2d4e479933cc7f9a9ae0c1</identifier>
|
||||
<datestamp>2022-12-31T11:48:55Z</datestamp>
|
||||
</header>
|
||||
<metadata xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:base_dc="http://oai.base-search.net/base_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<base_dc:dc xsi:schemaLocation="http://oai.base-search.net/base_dc/ http://oai.base-search.net/base_dc/base_dc.xsd">
|
||||
<base_dc:global_id>ftdoajarticles:oai:doaj.org/article:e2d5b5126b2d4e479933cc7f9a9ae0c1</base_dc:global_id>
|
||||
<base_dc:continent>cww</base_dc:continent>
|
||||
<base_dc:country>org</base_dc:country>
|
||||
<base_dc:collection opendoar_id="1234" ror_id="ror1234">ftdoajarticles</base_dc:collection>
|
||||
<base_dc:collname>TEST REPO</base_dc:collname>
|
||||
<dc:title>Assessment of cultural heritage: the legislative and methodological framework of Russian Federation</dc:title>
|
||||
<dc:creator>ALBU, Svetlana</dc:creator>
|
||||
<dc:creator>LEȘAN, Anna</dc:creator>
|
||||
<dc:subject>architectural heritage</dc:subject>
|
||||
<dc:subject>evaluation of architectural heritage</dc:subject>
|
||||
<dc:subject>types of values</dc:subject>
|
||||
<dc:subject>experience of russian federation</dc:subject>
|
||||
<dc:subject>Social Sciences</dc:subject>
|
||||
<dc:subject>H</dc:subject>
|
||||
<dc:description>Architectural heritage is the real estate inheritance by population of a country becoming an extremely valuable and specific category, preserving and capitalizing on those assets requires considerable effort. The state does not have sufficient means to maintain and preserve cultural heritage, as a result it is included in the civil circuit. The transfer of property right or of some partial rights over the architectural patrimony is accompanied by the necessity to estimate the value of goods. In this article, the authors examine the experience of Russian Federation (one of the largest countries with a huge architectural heritage) on the legislative framework of architectural and methodological heritage of architectural heritage assessment. The particularities of cultural assets valuation compared to other categories of real estate are examined, as well as the methodological aspects (types of values, methods applied in valuation, approaches according to the purpose of valuation) regarding the valuation of real estate with architectural value in Russian Federation.</dc:description>
|
||||
<dc:publisher>Technical University of Moldova</dc:publisher>
|
||||
<dc:date>2020-09-01T00:00:00Z</dc:date>
|
||||
<base_dc:year>2020</base_dc:year>
|
||||
<dc:type>article</dc:type>
|
||||
<base_dc:typenorm>121</base_dc:typenorm>
|
||||
<dc:identifier>https://doi.org/10.5281/zenodo.3971988</dc:identifier>
|
||||
<dc:identifier>https://doaj.org/article/e2d5b5126b2d4e479933cc7f9a9ae0c1</dc:identifier>
|
||||
<base_dc:link>https://doi.org/10.5281/zenodo.3971988</base_dc:link>
|
||||
<dc:source>Journal of Social Sciences, Vol 3, Iss 3, Pp 134-143 (2020)</dc:source>
|
||||
<dc:language>EN</dc:language>
|
||||
<dc:language>FR</dc:language>
|
||||
<dc:language>RO</dc:language>
|
||||
<dc:relation>http://ibn.idsi.md/sites/default/files/imag_file/JSS-3-2020_134-143.pdf</dc:relation>
|
||||
<dc:relation>https://doaj.org/toc/2587-3490</dc:relation>
|
||||
<dc:relation>https://doaj.org/toc/2587-3504</dc:relation>
|
||||
<dc:relation>doi:10.5281/zenodo.3971988</dc:relation>
|
||||
<dc:relation>2587-3490</dc:relation>
|
||||
<dc:relation>2587-3504</dc:relation>
|
||||
<dc:relation>https://doaj.org/article/e2d5b5126b2d4e479933cc7f9a9ae0c1</dc:relation>
|
||||
<base_dc:autoclasscode type="ddc">720</base_dc:autoclasscode>
|
||||
<base_dc:authod_id>
|
||||
<base_dc:creator_name>ALBU, Svetlana</base_dc:creator_name>
|
||||
<base_dc:creator_id>https://orcid.org/0000-0002-8648-950X</base_dc:creator_id>
|
||||
</base_dc:authod_id>
|
||||
<base_dc:authod_id>
|
||||
<base_dc:creator_name>LEȘAN, Anna</base_dc:creator_name>
|
||||
<base_dc:creator_id>https://orcid.org/0000-0003-3284-0525</base_dc:creator_id>
|
||||
</base_dc:authod_id>
|
||||
<base_dc:doi>https://doi.org/10.5281/zenodo.3971988</base_dc:doi>
|
||||
<base_dc:oa>1</base_dc:oa>
|
||||
<base_dc:lang>eng</base_dc:lang>
|
||||
<base_dc:lang>fre</base_dc:lang>
|
||||
<base_dc:lang>rum</base_dc:lang>
|
||||
</base_dc:dc>
|
||||
</metadata>
|
||||
</record>
|
@ -0,0 +1,36 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<record>
|
||||
<header xmlns="http://www.openarchives.org/OAI/2.0/">
|
||||
<identifier>ftunivminnesdc:oai:conservancy.umn.edu:11299/109914</identifier>
|
||||
<datestamp>2023-07-18T20:05:40Z</datestamp>
|
||||
</header>
|
||||
<metadata xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:base_dc="http://oai.base-search.net/base_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<base_dc:dc xsi:schemaLocation="http://oai.base-search.net/base_dc/ http://oai.base-search.net/base_dc/base_dc.xsd">
|
||||
<base_dc:global_id>ftunivminnesdc:oai:conservancy.umn.edu:11299/109914</base_dc:global_id>
|
||||
<base_dc:continent>cna</base_dc:continent>
|
||||
<base_dc:country>us</base_dc:country>
|
||||
<base_dc:collection opendoar_id="1008">ftunivminnesdc</base_dc:collection>
|
||||
<base_dc:collname>University of Minnesota Digital Conservancy</base_dc:collname>
|
||||
<dc:title>An Experimental Investigation of the influence of an air bubble layer on radiated noise and surface pressure fluctuations in a turbulent boundary layer</dc:title>
|
||||
<dc:creator>Killen, John M.</dc:creator>
|
||||
<dc:subject>Boundary layer noise</dc:subject>
|
||||
<dc:subject>Kinetic energy dissipation</dc:subject>
|
||||
<dc:subject>Air bubble layer</dc:subject>
|
||||
<dc:subject>Maximum stable bubble size</dc:subject>
|
||||
<dc:subject>bubble noise spectra</dc:subject>
|
||||
<dc:description>between the noise spectra of a layer of air bubbles in a turbulent flow and' a maximum stable bubble size which can exist in the same flow. An air bubbie layer with individual bubble sizes greater than a maximum stable size was introduced into the boundary layer of water flowing along a smooth flat plate. It was found that the maximum stable bubble size was related to the turbulent kinetic energy dissipation as had been previously shown by other investigators ; Office of Naval Research Arlington, Virginia 22217</dc:description>
|
||||
<dc:date>1981-09-31T21:42:52Z</dc:date>
|
||||
<base_dc:year>1981</base_dc:year>
|
||||
<dc:type>Report</dc:type>
|
||||
<base_dc:typenorm>14</base_dc:typenorm>
|
||||
<dc:identifier>http://purl.umn.edu/109914</dc:identifier>
|
||||
<base_dc:link>http://purl.umn.edu/109914</base_dc:link>
|
||||
<dc:language>en_US</dc:language>
|
||||
<dc:relation>Project Reports</dc:relation>
|
||||
<dc:relation>202</dc:relation>
|
||||
<dc:relation>http://purl.umn.edu/109914</dc:relation>
|
||||
<base_dc:oa>2</base_dc:oa>
|
||||
<base_dc:lang>eng</base_dc:lang>
|
||||
</base_dc:dc>
|
||||
</metadata>
|
||||
</record>
|
@ -1,40 +0,0 @@
|
||||
package eu.dnetlib.dhp.enrich.orcid
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
|
||||
import eu.dnetlib.dhp.schema.sx.OafUtils
|
||||
import org.apache.spark.sql.Row
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object AuthorEnricher extends Serializable {
|
||||
|
||||
def createAuthor(givenName: String, familyName: String, orcid: String): Author = {
|
||||
val a = new Author
|
||||
a.setName(givenName)
|
||||
a.setSurname(familyName)
|
||||
a.setFullname(s"$givenName $familyName")
|
||||
val pid = OafUtils.createSP(orcid, ModelConstants.ORCID, ModelConstants.ORCID)
|
||||
pid.setDataInfo(OafUtils.generateDataInfo())
|
||||
pid.getDataInfo.setProvenanceaction(OafUtils.createQualifier("ORCID_ENRICHMENT", "ORCID_ENRICHMENT"))
|
||||
a.setPid(List(pid).asJava)
|
||||
a
|
||||
}
|
||||
|
||||
def toOAFAuthor(r: Row): java.util.List[Author] = {
|
||||
r.getList[Row](1)
|
||||
.asScala
|
||||
.map(s => createAuthor(s.getAs[String]("givenName"), s.getAs[String]("familyName"), s.getAs[String]("orcid")))
|
||||
.toList
|
||||
.asJava
|
||||
}
|
||||
|
||||
// def enrichAuthor(p:Publication,r:Row): Unit = {
|
||||
// val k:Map[String, OAuthor] =r.getList[Row](1).asScala.map(s => (s.getAs[String]("orcid"), OAuthor(s.getAs[String]("givenName") ,s.getAs[String]("familyName") ))).groupBy(_._1).mapValues(_.map(_._2).head)
|
||||
// println(k)
|
||||
//
|
||||
//
|
||||
//
|
||||
// }
|
||||
|
||||
}
|
@ -0,0 +1,128 @@
|
||||
package eu.dnetlib.dhp.enrich.orcid
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf.{Author, StructuredProperty}
|
||||
import eu.dnetlib.dhp.schema.sx.OafUtils
|
||||
|
||||
import java.util
|
||||
import scala.beans.BeanProperty
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.util.control.Breaks.{break, breakable}
|
||||
|
||||
case class ORCIDAuthorEnricherResult(
|
||||
@BeanProperty var id: String,
|
||||
@BeanProperty var enriched_author: java.util.List[Author],
|
||||
@BeanProperty var author_matched: java.util.List[MatchedAuthors],
|
||||
@BeanProperty var author_unmatched: java.util.List[Author],
|
||||
@BeanProperty var orcid_unmatched: java.util.List[OrcidAutor]
|
||||
)
|
||||
|
||||
object ORCIDAuthorEnricher extends Serializable {
|
||||
|
||||
def enrichOrcid(
|
||||
id: String,
|
||||
graph_authors: java.util.List[Author],
|
||||
orcid_authors: java.util.List[OrcidAutor]
|
||||
): ORCIDAuthorEnricherResult = {
|
||||
// Author enriching strategy:
|
||||
// 1) create a copy of graph author list in unmatched_authors
|
||||
// 2) find best match in unmatched_authors, remove it from unmatched_authors and enrich it so
|
||||
// that the enrichment is reflected in graph_authors (they share author instances)
|
||||
// 3) repeat (2) till the end of the list and then with different matching algorithms that have decreasing
|
||||
// trust in their output
|
||||
// At the end unmatched_authors will contain authors not matched with any of the matching algos
|
||||
val unmatched_authors = new util.ArrayList[Author](graph_authors)
|
||||
|
||||
val matches = {
|
||||
// Look after exact fullname match, reconstruct ORCID fullname as givenName + familyName
|
||||
extractAndEnrichMatches(
|
||||
unmatched_authors,
|
||||
orcid_authors,
|
||||
(author, orcid) =>
|
||||
ORCIDAuthorMatchers.matchEqualsIgnoreCase(author.getFullname, orcid.givenName + " " + orcid.familyName),
|
||||
"fullName"
|
||||
) ++
|
||||
// Look after exact reversed fullname match, reconstruct ORCID fullname as familyName + givenName
|
||||
extractAndEnrichMatches(
|
||||
unmatched_authors,
|
||||
orcid_authors,
|
||||
(author, orcid) =>
|
||||
ORCIDAuthorMatchers.matchEqualsIgnoreCase(author.getFullname, orcid.familyName + " " + orcid.givenName),
|
||||
"reversedFullName"
|
||||
) ++
|
||||
// split author names in tokens, order the tokens, then check for matches of full tokens or abbreviations
|
||||
extractAndEnrichMatches(
|
||||
unmatched_authors,
|
||||
orcid_authors,
|
||||
(author, orcid) =>
|
||||
ORCIDAuthorMatchers
|
||||
.matchOrderedTokenAndAbbreviations(author.getFullname, orcid.givenName + " " + orcid.familyName),
|
||||
"orderedTokens"
|
||||
) ++
|
||||
// look after exact matches of ORCID creditName
|
||||
extractAndEnrichMatches(
|
||||
unmatched_authors,
|
||||
orcid_authors,
|
||||
(author, orcid) => ORCIDAuthorMatchers.matchEqualsIgnoreCase(author.getFullname, orcid.creditName),
|
||||
"creditName"
|
||||
) ++
|
||||
// look after exact matches in ORCID otherNames
|
||||
extractAndEnrichMatches(
|
||||
unmatched_authors,
|
||||
orcid_authors,
|
||||
(author, orcid) =>
|
||||
orcid.otherNames != null && ORCIDAuthorMatchers.matchOtherNames(author.getFullname, orcid.otherNames.asScala),
|
||||
"otherNames"
|
||||
)
|
||||
}
|
||||
|
||||
ORCIDAuthorEnricherResult(id, graph_authors, matches.asJava, unmatched_authors, orcid_authors)
|
||||
}
|
||||
|
||||
private def extractAndEnrichMatches(
|
||||
graph_authors: java.util.List[Author],
|
||||
orcid_authors: java.util.List[OrcidAutor],
|
||||
matchingFunc: (Author, OrcidAutor) => Boolean,
|
||||
matchName: String
|
||||
) = {
|
||||
val matched = scala.collection.mutable.ArrayBuffer.empty[MatchedAuthors]
|
||||
|
||||
if (graph_authors != null && !graph_authors.isEmpty) {
|
||||
val ait = graph_authors.iterator
|
||||
|
||||
while (ait.hasNext) {
|
||||
val author = ait.next()
|
||||
val oit = orcid_authors.iterator
|
||||
|
||||
breakable {
|
||||
while (oit.hasNext) {
|
||||
val orcid = oit.next()
|
||||
|
||||
if (matchingFunc(author, orcid)) {
|
||||
ait.remove()
|
||||
oit.remove()
|
||||
matched += MatchedAuthors(author, orcid, matchName)
|
||||
|
||||
if (author.getPid == null) {
|
||||
author.setPid(new util.ArrayList[StructuredProperty]())
|
||||
}
|
||||
|
||||
val orcidPID = OafUtils.createSP(orcid.orcid, ModelConstants.ORCID, ModelConstants.ORCID)
|
||||
orcidPID.setDataInfo(OafUtils.generateDataInfo())
|
||||
orcidPID.getDataInfo.setProvenanceaction(
|
||||
OafUtils.createQualifier("ORCID_ENRICHMENT", "ORCID_ENRICHMENT")
|
||||
)
|
||||
|
||||
author.getPid.add(orcidPID)
|
||||
|
||||
break()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
matched
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,65 @@
|
||||
package eu.dnetlib.dhp.enrich.orcid
|
||||
|
||||
import java.util.Locale
|
||||
import java.util.regex.Pattern
|
||||
|
||||
object ORCIDAuthorMatchers {
|
||||
val SPLIT_REGEX = Pattern.compile("[\\s,\\.]+")
|
||||
|
||||
val WORD_DIFF = 2
|
||||
|
||||
def matchEqualsIgnoreCase(a1: String, a2: String): Boolean = {
|
||||
if (a1 == null || a2 == null)
|
||||
false
|
||||
else
|
||||
a1 == a2 || a1.toLowerCase(Locale.ROOT).equals(a2.toLowerCase(Locale.ROOT))
|
||||
}
|
||||
|
||||
def matchOtherNames(fullName: String, otherNames: Seq[String]): Boolean = {
|
||||
if (otherNames != null) {
|
||||
otherNames.exists(matchEqualsIgnoreCase(fullName, _))
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
def matchOrderedTokenAndAbbreviations(a1: String, a2: String): Boolean = {
|
||||
val p1: Array[String] = SPLIT_REGEX.split(a1.trim.toLowerCase(Locale.ROOT)).filter(_.nonEmpty).sorted
|
||||
val p2: Array[String] = SPLIT_REGEX.split(a2.trim.toLowerCase(Locale.ROOT)).filter(_.nonEmpty).sorted
|
||||
|
||||
if (p1.length < 2 || p2.length < 2) return false
|
||||
if (Math.abs(p1.length - p2.length) > WORD_DIFF) return false // use alternative comparison algo
|
||||
|
||||
var p1Idx: Int = 0
|
||||
var p2Idx: Int = 0
|
||||
var shortMatches: Int = 0
|
||||
var longMatches: Int = 0
|
||||
while (p1Idx < p1.length && p2Idx < p2.length) {
|
||||
val e1: String = p1(p1Idx)
|
||||
val c1: Char = e1.charAt(0)
|
||||
val e2: String = p2(p2Idx)
|
||||
val c2: Char = e2.charAt(0)
|
||||
if (c1 < c2) p1Idx += 1
|
||||
else if (c1 > c2) p2Idx += 1
|
||||
else {
|
||||
var res: Boolean = false
|
||||
if (e1.length != 1 && e2.length != 1) {
|
||||
res = e1 == e2
|
||||
longMatches += 1
|
||||
} else {
|
||||
res = true
|
||||
shortMatches += 1
|
||||
}
|
||||
if (res) {
|
||||
p1Idx += 1
|
||||
p2Idx += 1
|
||||
} else {
|
||||
val diff: Int = e1.compareTo(e2)
|
||||
if (diff < 0) p1Idx += 1
|
||||
else if (diff > 0) p2Idx += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
longMatches > 0 && (shortMatches + longMatches) == Math.min(p1.length, p2.length)
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue