forked from D-Net/dnet-hadoop
Compare commits
4 Commits
master
...
doiboost_r
Author | SHA1 | Date |
---|---|---|
Sandro La Bruzzo | c8ddb527b3 | |
Sandro La Bruzzo | 27fbc9b385 | |
Sandro La Bruzzo | e517f52e30 | |
Sandro La Bruzzo | 8b9f70d977 |
|
@ -0,0 +1,27 @@
|
||||||
|
package eu.dnetlib.dhp.common.test;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
public class TestUtils {
|
||||||
|
|
||||||
|
public static List<String> getVocabulariesMock() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
TestUtils.class.getResourceAsStream("/eu/dnetlib/dhp/vocabulary/terms.txt")));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<String> getSynonymsMock() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
TestUtils.class.getResourceAsStream("/eu/dnetlib/dhp/vocabulary/synonyms.txt")));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,74 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
public class CleaningFunctions {
|
||||||
|
|
||||||
|
public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
|
||||||
|
public static final String DOI_PREFIX = "10.";
|
||||||
|
|
||||||
|
public static final Set<String> PID_BLACKLIST = new HashSet<>();
|
||||||
|
|
||||||
|
static {
|
||||||
|
PID_BLACKLIST.add("none");
|
||||||
|
PID_BLACKLIST.add("na");
|
||||||
|
}
|
||||||
|
|
||||||
|
public CleaningFunctions() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility method that filter PID values on a per-type basis.
|
||||||
|
* @param s the PID whose value will be checked.
|
||||||
|
* @return false if the pid matches the filter criteria, true otherwise.
|
||||||
|
*/
|
||||||
|
public static boolean pidFilter(StructuredProperty s) {
|
||||||
|
final String pidValue = s.getValue();
|
||||||
|
if (Objects.isNull(s.getQualifier()) ||
|
||||||
|
StringUtils.isBlank(pidValue) ||
|
||||||
|
StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return !PidBlacklistProvider.getBlacklist(s.getQualifier().getClassid()).contains(pidValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility method that normalises PID values on a per-type basis.
|
||||||
|
* @param pid the PID whose value will be normalised.
|
||||||
|
* @return the PID containing the normalised value.
|
||||||
|
*/
|
||||||
|
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
|
||||||
|
pid.setValue(
|
||||||
|
normalizePidValue(
|
||||||
|
pid.getQualifier().getClassid(),
|
||||||
|
pid.getValue()));
|
||||||
|
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String normalizePidValue(String pidType, String pidValue) {
|
||||||
|
String value = Optional
|
||||||
|
.ofNullable(pidValue)
|
||||||
|
.map(String::trim)
|
||||||
|
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
|
||||||
|
|
||||||
|
switch (pidType) {
|
||||||
|
|
||||||
|
// TODO add cleaning for more PID types as needed
|
||||||
|
case "doi":
|
||||||
|
return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,283 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static com.google.common.base.Preconditions.checkArgument;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.security.MessageDigest;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import org.apache.commons.codec.binary.Hex;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.google.common.collect.HashBiMap;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory class for OpenAIRE identifiers in the Graph
|
||||||
|
*/
|
||||||
|
public class IdentifierFactory implements Serializable {
|
||||||
|
|
||||||
|
public static final String ID_SEPARATOR = "::";
|
||||||
|
public static final String ID_PREFIX_SEPARATOR = "|";
|
||||||
|
|
||||||
|
public static final int ID_PREFIX_LEN = 12;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] considered authoritative for that PID_TYPE
|
||||||
|
*/
|
||||||
|
public static final Map<PidType, HashBiMap<String, String>> PID_AUTHORITY = Maps.newHashMap();
|
||||||
|
|
||||||
|
static {
|
||||||
|
PID_AUTHORITY.put(PidType.doi, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref");
|
||||||
|
PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite");
|
||||||
|
PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "ZENODO");
|
||||||
|
PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "Zenodo");
|
||||||
|
|
||||||
|
PID_AUTHORITY.put(PidType.pmc, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
|
||||||
|
PID_AUTHORITY.get(PidType.pmc).put(PUBMED_CENTRAL_ID, "PubMed Central");
|
||||||
|
|
||||||
|
PID_AUTHORITY.put(PidType.pmid, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.pmid).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
|
||||||
|
PID_AUTHORITY.get(PidType.pmid).put(PUBMED_CENTRAL_ID, "PubMed Central");
|
||||||
|
|
||||||
|
PID_AUTHORITY.put(PidType.arXiv, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Declares the associations PID_TYPE -> [DATASOURCE ID, PID SUBSTRING] considered as delegated authority for that
|
||||||
|
* PID_TYPE. Example, Zenodo is delegated to forge DOIs that contain the 'zenodo' word.
|
||||||
|
*/
|
||||||
|
public static final Map<PidType, Map<String, String>> DELEGATED_PID_AUTHORITY = Maps.newHashMap();
|
||||||
|
|
||||||
|
static {
|
||||||
|
DELEGATED_PID_AUTHORITY.put(PidType.doi, new HashMap<>());
|
||||||
|
DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "zenodo");
|
||||||
|
DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "zenodo");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] whose records are considered enrichment for the graph.
|
||||||
|
* Their OpenAIRE ID is built from the declared PID type. Are merged with their corresponding record, identified by
|
||||||
|
* the same OpenAIRE id.
|
||||||
|
*/
|
||||||
|
public static final Map<PidType, HashBiMap<String, String>> ENRICHMENT_PROVIDER = Maps.newHashMap();
|
||||||
|
|
||||||
|
static {
|
||||||
|
ENRICHMENT_PROVIDER.put(PidType.doi, HashBiMap.create());
|
||||||
|
ENRICHMENT_PROVIDER.get(PidType.doi).put(OPEN_APC_ID, OPEN_APC_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Set<String> delegatedAuthorityDatasourceIds() {
|
||||||
|
return DELEGATED_PID_AUTHORITY.values()
|
||||||
|
.stream()
|
||||||
|
.flatMap(m -> m.keySet().stream())
|
||||||
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
|
||||||
|
return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T extends Result> String createDOIBoostIdentifier(T entity) {
|
||||||
|
if (entity == null)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
StructuredProperty pid = null;
|
||||||
|
if (entity.getPid() != null) {
|
||||||
|
pid = entity
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid()))
|
||||||
|
.filter(CleaningFunctions::pidFilter)
|
||||||
|
.findAny()
|
||||||
|
.orElse(null);
|
||||||
|
} else {
|
||||||
|
if (entity.getInstance() != null) {
|
||||||
|
pid = entity
|
||||||
|
.getInstance()
|
||||||
|
.stream()
|
||||||
|
.filter(i -> i.getPid() != null)
|
||||||
|
.flatMap(i -> i.getPid().stream())
|
||||||
|
.filter(CleaningFunctions::pidFilter)
|
||||||
|
.findAny()
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (pid != null)
|
||||||
|
return idFromPid(entity, pid, true);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an identifier from the most relevant PID (if available) provided by a known PID authority in the given
|
||||||
|
* entity T. Returns entity.id when none of the PIDs meet the selection criteria is available.
|
||||||
|
*
|
||||||
|
* @param entity the entity providing PIDs and a default ID.
|
||||||
|
* @param <T> the specific entity type. Currently Organization and Result subclasses are supported.
|
||||||
|
* @param md5 indicates whether should hash the PID value or not.
|
||||||
|
* @return an identifier from the most relevant PID, entity.id otherwise
|
||||||
|
*/
|
||||||
|
public static <T extends OafEntity> String createIdentifier(T entity, boolean md5) {
|
||||||
|
|
||||||
|
checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier");
|
||||||
|
|
||||||
|
final Map<String, Set<StructuredProperty>> pids = extractPids(entity);
|
||||||
|
|
||||||
|
return pids
|
||||||
|
.values()
|
||||||
|
.stream()
|
||||||
|
.flatMap(Set::stream)
|
||||||
|
.min(new PidComparator<>(entity))
|
||||||
|
.map(
|
||||||
|
min -> Optional
|
||||||
|
.ofNullable(pids.get(min.getQualifier().getClassid()))
|
||||||
|
.map(
|
||||||
|
p -> p
|
||||||
|
.stream()
|
||||||
|
.sorted(new PidValueComparator())
|
||||||
|
.findFirst()
|
||||||
|
.map(s -> idFromPid(entity, s, md5))
|
||||||
|
.orElseGet(entity::getId))
|
||||||
|
.orElseGet(entity::getId))
|
||||||
|
.orElseGet(entity::getId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends OafEntity> Map<String, Set<StructuredProperty>> extractPids(T entity) {
|
||||||
|
if (entity instanceof Result) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(((Result) entity).getInstance())
|
||||||
|
.map(IdentifierFactory::mapPids)
|
||||||
|
.orElse(new HashMap<>());
|
||||||
|
} else {
|
||||||
|
return entity
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.map(CleaningFunctions::normalizePidValue)
|
||||||
|
.filter(CleaningFunctions::pidFilter)
|
||||||
|
.collect(
|
||||||
|
Collectors
|
||||||
|
.groupingBy(
|
||||||
|
p -> p.getQualifier().getClassid(),
|
||||||
|
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Map<String, Set<StructuredProperty>> mapPids(List<Instance> instance) {
|
||||||
|
return instance
|
||||||
|
.stream()
|
||||||
|
.map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false))
|
||||||
|
.flatMap(Function.identity())
|
||||||
|
.collect(
|
||||||
|
Collectors
|
||||||
|
.groupingBy(
|
||||||
|
p -> p.getQualifier().getClassid(),
|
||||||
|
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Stream<StructuredProperty> pidFromInstance(List<StructuredProperty> pid, KeyValue collectedFrom,
|
||||||
|
boolean mapHandles) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(pid)
|
||||||
|
.map(
|
||||||
|
pp -> pp
|
||||||
|
.stream()
|
||||||
|
// filter away PIDs provided by a DS that is not considered an authority for the
|
||||||
|
// given PID Type
|
||||||
|
.filter(p -> shouldFilterPidByCriteria(collectedFrom, p, mapHandles))
|
||||||
|
.map(CleaningFunctions::normalizePidValue)
|
||||||
|
.filter(p -> isNotFromDelegatedAuthority(collectedFrom, p))
|
||||||
|
.filter(CleaningFunctions::pidFilter))
|
||||||
|
.orElse(Stream.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static boolean shouldFilterPidByCriteria(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) {
|
||||||
|
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
|
||||||
|
|
||||||
|
if (Objects.isNull(collectedFrom)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean isEnrich = Optional
|
||||||
|
.ofNullable(ENRICHMENT_PROVIDER.get(pType))
|
||||||
|
.map(enrich -> enrich.containsKey(collectedFrom.getKey())
|
||||||
|
|| enrich.containsValue(collectedFrom.getValue()))
|
||||||
|
.orElse(false);
|
||||||
|
|
||||||
|
boolean isAuthority = Optional
|
||||||
|
.ofNullable(PID_AUTHORITY.get(pType))
|
||||||
|
.map(authorities -> authorities.containsKey(collectedFrom.getKey())
|
||||||
|
|| authorities.containsValue(collectedFrom.getValue()))
|
||||||
|
.orElse(false);
|
||||||
|
|
||||||
|
return (mapHandles && pType.equals(PidType.handle)) || isEnrich || isAuthority;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isNotFromDelegatedAuthority(KeyValue collectedFrom, StructuredProperty p) {
|
||||||
|
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
|
||||||
|
|
||||||
|
final Map<String, String> da = DELEGATED_PID_AUTHORITY.get(pType);
|
||||||
|
if (Objects.isNull(da)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!da.containsKey(collectedFrom.getKey())) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return StringUtils.contains(p.getValue(), da.get(collectedFrom.getKey()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see {@link IdentifierFactory#createIdentifier(OafEntity, boolean)}
|
||||||
|
*/
|
||||||
|
public static <T extends OafEntity> String createIdentifier(T entity) {
|
||||||
|
|
||||||
|
return createIdentifier(entity, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends OafEntity> String idFromPid(T entity, StructuredProperty s, boolean md5) {
|
||||||
|
return idFromPid(ModelSupport.getIdPrefix(entity.getClass()), s.getQualifier().getClassid(), s.getValue(), md5);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) {
|
||||||
|
return new StringBuilder()
|
||||||
|
.append(numericPrefix)
|
||||||
|
.append(ID_PREFIX_SEPARATOR)
|
||||||
|
.append(createPrefix(pidType))
|
||||||
|
.append(ID_SEPARATOR)
|
||||||
|
.append(md5 ? md5(pidValue) : pidValue)
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
// create the prefix (length = 12)
|
||||||
|
private static String createPrefix(String pidType) {
|
||||||
|
StringBuilder prefix = new StringBuilder(StringUtils.left(pidType, ID_PREFIX_LEN));
|
||||||
|
while (prefix.length() < ID_PREFIX_LEN) {
|
||||||
|
prefix.append("_");
|
||||||
|
}
|
||||||
|
return prefix.substring(0, ID_PREFIX_LEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String md5(final String s) {
|
||||||
|
try {
|
||||||
|
final MessageDigest md = MessageDigest.getInstance("MD5");
|
||||||
|
md.update(s.getBytes(StandardCharsets.UTF_8));
|
||||||
|
return new String(Hex.encodeHex(md.digest()));
|
||||||
|
} catch (final Exception e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,24 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
public class ModelHardLimits {
|
||||||
|
|
||||||
|
private ModelHardLimits() {}
|
||||||
|
|
||||||
|
public static final String LAYOUT = "index";
|
||||||
|
public static final String INTERPRETATION = "openaire";
|
||||||
|
public static final String SEPARATOR = "-";
|
||||||
|
|
||||||
|
public static final int MAX_EXTERNAL_ENTITIES = 50;
|
||||||
|
public static final int MAX_AUTHORS = 200;
|
||||||
|
public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000;
|
||||||
|
public static final int MAX_TITLE_LENGTH = 5000;
|
||||||
|
public static final int MAX_TITLES = 10;
|
||||||
|
public static final int MAX_ABSTRACT_LENGTH = 150000;
|
||||||
|
public static final int MAX_INSTANCES = 10;
|
||||||
|
|
||||||
|
public static String getCollectionName(String format) {
|
||||||
|
return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -26,26 +26,21 @@ public class OafMapperUtils {
|
||||||
if (ModelSupport.isSubClass(left, OafEntity.class)) {
|
if (ModelSupport.isSubClass(left, OafEntity.class)) {
|
||||||
return mergeEntities((OafEntity) left, (OafEntity) right);
|
return mergeEntities((OafEntity) left, (OafEntity) right);
|
||||||
} else if (ModelSupport.isSubClass(left, Relation.class)) {
|
} else if (ModelSupport.isSubClass(left, Relation.class)) {
|
||||||
((Relation) left).mergeFrom((Relation) right);
|
return OafUtils.mergeOAF(left, right);
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
|
throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
|
||||||
}
|
}
|
||||||
return left;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static OafEntity mergeEntities(OafEntity left, OafEntity right) {
|
public static OafEntity mergeEntities(OafEntity left, OafEntity right) {
|
||||||
if (ModelSupport.isSubClass(left, Result.class)) {
|
if (ModelSupport.isSubClass(left, Result.class)) {
|
||||||
return mergeResults((Result) left, (Result) right);
|
return mergeResults((Result) left, (Result) right);
|
||||||
} else if (ModelSupport.isSubClass(left, Datasource.class)) {
|
} else if (ModelSupport.isSubClass(left, Datasource.class) || ModelSupport.isSubClass(left, Organization.class) || ModelSupport.isSubClass(left, Project.class))
|
||||||
left.mergeFrom(right);
|
{
|
||||||
} else if (ModelSupport.isSubClass(left, Organization.class)) {
|
return OafUtils.mergeOAF(left, right);
|
||||||
left.mergeFrom(right);
|
|
||||||
} else if (ModelSupport.isSubClass(left, Project.class)) {
|
|
||||||
left.mergeFrom(right);
|
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
|
throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
|
||||||
}
|
}
|
||||||
return left;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Result mergeResults(Result left, Result right) {
|
public static Result mergeResults(Result left, Result right) {
|
||||||
|
@ -59,13 +54,11 @@ public class OafMapperUtils {
|
||||||
if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) {
|
if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) {
|
||||||
return right;
|
return right;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new ResultTypeComparator().compare(left, right) < 0) {
|
if (new ResultTypeComparator().compare(left, right) < 0) {
|
||||||
left.mergeFrom(right);
|
return OafUtils.mergeOAF(left, right);
|
||||||
return left;
|
|
||||||
} else {
|
} else {
|
||||||
right.mergeFrom(left);
|
return OafUtils.mergeOAF(right, left);
|
||||||
return right;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,6 +244,9 @@ public class OafMapperUtils {
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public static Journal journal(
|
public static Journal journal(
|
||||||
final String name,
|
final String name,
|
||||||
final String issnPrinted,
|
final String issnPrinted,
|
||||||
|
@ -416,7 +412,7 @@ public class OafMapperUtils {
|
||||||
public static Measure newMeasureInstance(String id, String value, String key, DataInfo dataInfo) {
|
public static Measure newMeasureInstance(String id, String value, String key, DataInfo dataInfo) {
|
||||||
Measure m = new Measure();
|
Measure m = new Measure();
|
||||||
m.setId(id);
|
m.setId(id);
|
||||||
m.setUnit(Arrays.asList(newKeyValueInstance(key, value, dataInfo)));
|
m.setUnit(Collections.singletonList(newKeyValueInstance(key, value, dataInfo)));
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,761 @@
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import eu.dnetlib.dhp.common.api.zenodo.File;
|
||||||
|
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.lang3.tuple.ImmutablePair;
|
||||||
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
import static com.google.common.base.Preconditions.checkArgument;
|
||||||
|
import static com.ibm.icu.text.PluralRules.Operand.e;
|
||||||
|
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The type Oaf utils class groups all
|
||||||
|
* the utilities method used for OAF Generation
|
||||||
|
*/
|
||||||
|
public class OafUtils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize pid string.
|
||||||
|
*
|
||||||
|
* @param pid the pid
|
||||||
|
* @return the string
|
||||||
|
*/
|
||||||
|
public static String extractKeyFromPid(final StructuredProperty pid) {
|
||||||
|
if (pid == null)
|
||||||
|
return null;
|
||||||
|
final StructuredProperty normalizedPid = CleaningFunctions.normalizePidValue(pid);
|
||||||
|
|
||||||
|
return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method converts the list of instance enrichments
|
||||||
|
* into a Map where the key is the normalized identifier
|
||||||
|
* and the value is the instance itself
|
||||||
|
*
|
||||||
|
* @param ri the list of enrichment instances
|
||||||
|
* @return the result map
|
||||||
|
*/
|
||||||
|
public static Map<String, Instance> toInstanceMap(final List<Instance> ri) {
|
||||||
|
|
||||||
|
|
||||||
|
return ri
|
||||||
|
.stream()
|
||||||
|
.filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null)
|
||||||
|
.flatMap(i -> {
|
||||||
|
final List<Pair<String, Instance>> result = new ArrayList<>();
|
||||||
|
if (i.getPid() != null)
|
||||||
|
i.getPid().stream().filter(OafUtils::validPid).forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
|
||||||
|
if (i.getAlternateIdentifier() != null)
|
||||||
|
i.getAlternateIdentifier().stream().filter(OafUtils::validPid).forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
|
||||||
|
return result.stream();
|
||||||
|
}).collect(Collectors.toMap(
|
||||||
|
Pair::getLeft,
|
||||||
|
Pair::getRight,
|
||||||
|
(a, b) -> a
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Valid pid boolean.
|
||||||
|
*
|
||||||
|
* @param p the p
|
||||||
|
* @return the boolean
|
||||||
|
*/
|
||||||
|
private static boolean validPid(final StructuredProperty p) {
|
||||||
|
return p.getValue()!= null && p.getQualifier()!= null && p.getQualifier().getClassid()!=null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This utility method finds the list of enrichment instances
|
||||||
|
* that match one or more PIDs in the input list
|
||||||
|
*
|
||||||
|
* @param pids the list of PIDs
|
||||||
|
* @param enrichments the List of enrichment instances having the same pid
|
||||||
|
* @return the list
|
||||||
|
*/
|
||||||
|
private static List<Instance> findEnrichmentsByPID(final List<StructuredProperty> pids, final Map<String,Instance> enrichments) {
|
||||||
|
if (pids == null || enrichments == null)
|
||||||
|
return null;
|
||||||
|
return pids
|
||||||
|
.stream()
|
||||||
|
.map(OafUtils::extractKeyFromPid)
|
||||||
|
.map(enrichments::get)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is an enrichment boolean.
|
||||||
|
*
|
||||||
|
* @param e the e
|
||||||
|
* @return the boolean
|
||||||
|
*/
|
||||||
|
public static boolean isAnEnrichment(OafEntity e) {
|
||||||
|
return e.getDataInfo()!= null &&
|
||||||
|
e.getDataInfo().getProvenanceaction()!= null
|
||||||
|
&& ModelConstants.PROVENANCE_ENRICH.equalsIgnoreCase(e.getDataInfo().getProvenanceaction().getClassid());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method apply enrichment on a single instance
|
||||||
|
* The enrichment consists of replacing values on
|
||||||
|
* single attribute only if in the current instance is missing
|
||||||
|
* The only repeatable field enriched is measures
|
||||||
|
*
|
||||||
|
* @param currentInstance the current instance
|
||||||
|
* @param enrichment the enrichment instance
|
||||||
|
*/
|
||||||
|
private static void applyEnrichment(final Instance currentInstance, final Instance enrichment) {
|
||||||
|
if (currentInstance == null || enrichment == null)
|
||||||
|
return;
|
||||||
|
|
||||||
|
//ENRICH accessright
|
||||||
|
if (enrichment.getAccessright()!=null && currentInstance.getAccessright() == null)
|
||||||
|
currentInstance.setAccessright(enrichment.getAccessright());
|
||||||
|
|
||||||
|
//ENRICH license
|
||||||
|
if (enrichment.getLicense()!=null && currentInstance.getLicense() == null)
|
||||||
|
currentInstance.setLicense(enrichment.getLicense());
|
||||||
|
|
||||||
|
//ENRICH instanceType
|
||||||
|
if (enrichment.getInstancetype()!=null && currentInstance.getInstancetype() == null)
|
||||||
|
currentInstance.setInstancetype(enrichment.getInstancetype());
|
||||||
|
|
||||||
|
//ENRICH hostedby
|
||||||
|
if (enrichment.getHostedby()!=null && currentInstance.getHostedby() == null)
|
||||||
|
currentInstance.setHostedby(enrichment.getHostedby());
|
||||||
|
|
||||||
|
//ENRICH distributionlocation
|
||||||
|
if (enrichment.getDistributionlocation()!=null && currentInstance.getDistributionlocation() == null)
|
||||||
|
currentInstance.setDistributionlocation(enrichment.getDistributionlocation());
|
||||||
|
|
||||||
|
//ENRICH collectedfrom
|
||||||
|
if (enrichment.getCollectedfrom()!=null && currentInstance.getCollectedfrom() == null)
|
||||||
|
currentInstance.setCollectedfrom(enrichment.getCollectedfrom());
|
||||||
|
|
||||||
|
//ENRICH dateofacceptance
|
||||||
|
if (enrichment.getDateofacceptance()!=null && currentInstance.getDateofacceptance() == null)
|
||||||
|
currentInstance.setDateofacceptance(enrichment.getDateofacceptance());
|
||||||
|
|
||||||
|
//ENRICH processingchargeamount
|
||||||
|
if (enrichment.getProcessingchargeamount()!=null && currentInstance.getProcessingchargeamount() == null)
|
||||||
|
currentInstance.setProcessingchargeamount(enrichment.getProcessingchargeamount());
|
||||||
|
|
||||||
|
//ENRICH refereed
|
||||||
|
if (enrichment.getRefereed()!=null && currentInstance.getRefereed() == null)
|
||||||
|
currentInstance.setRefereed(enrichment.getRefereed());
|
||||||
|
|
||||||
|
//ENRICH measures
|
||||||
|
if (enrichment.getMeasures()!=null)
|
||||||
|
if (currentInstance.getMeasures() == null)
|
||||||
|
currentInstance.setMeasures(enrichment.getMeasures());
|
||||||
|
else
|
||||||
|
enrichment.getMeasures().forEach(currentInstance.getMeasures()::add);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This main method apply the enrichment of the instances
|
||||||
|
*
|
||||||
|
* @param toEnrichInstances the instances that could be enriched
|
||||||
|
* @param enrichmentInstances the enrichment instances
|
||||||
|
* @return list of instances possibly enriched
|
||||||
|
*/
|
||||||
|
private static List<Instance> enrichInstances(final List<Instance> toEnrichInstances,final List<Instance> enrichmentInstances) {
|
||||||
|
final List<Instance> enrichmentResult = new ArrayList<>();
|
||||||
|
|
||||||
|
if (toEnrichInstances == null) {
|
||||||
|
return enrichmentResult;
|
||||||
|
}
|
||||||
|
if (enrichmentInstances == null) {
|
||||||
|
return enrichmentResult;
|
||||||
|
}
|
||||||
|
Map<String, Instance> ri = toInstanceMap(enrichmentInstances);
|
||||||
|
|
||||||
|
toEnrichInstances.forEach(i -> {
|
||||||
|
final List<Instance> e = findEnrichmentsByPID(i.getPid(), ri);
|
||||||
|
if (e!= null && e.size()> 0) {
|
||||||
|
e.forEach(enr -> applyEnrichment(i, enr));
|
||||||
|
} else {
|
||||||
|
final List<Instance> a = findEnrichmentsByPID(i.getAlternateIdentifier(), ri);
|
||||||
|
if (a!= null && a.size()> 0) {
|
||||||
|
a.forEach(enr -> applyEnrichment(i, enr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
enrichmentResult.add(i);
|
||||||
|
});
|
||||||
|
return enrichmentResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Longest lists list.
|
||||||
|
*
|
||||||
|
* @param a the a
|
||||||
|
* @param b the b
|
||||||
|
* @return the list
|
||||||
|
*/
|
||||||
|
public static List<Field<String>> longestLists(List<Field<String>> a, List<Field<String>> b) {
|
||||||
|
if (a == null || b == null)
|
||||||
|
return a == null ? b : a;
|
||||||
|
if (a.size() == b.size()) {
|
||||||
|
int msa = a
|
||||||
|
.stream()
|
||||||
|
.filter(i -> i != null && i.getValue() != null)
|
||||||
|
.map(i -> i.getValue().length())
|
||||||
|
.max(Comparator.naturalOrder())
|
||||||
|
.orElse(0);
|
||||||
|
int msb = b
|
||||||
|
.stream()
|
||||||
|
.filter(i -> i != null && i.getValue() != null)
|
||||||
|
.map(i -> i.getValue().length())
|
||||||
|
.max(Comparator.naturalOrder())
|
||||||
|
.orElse(0);
|
||||||
|
return msa > msb ? a : b;
|
||||||
|
}
|
||||||
|
return a.size() > b.size() ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method merge two oaf object of the same type
|
||||||
|
* It should replace the mergeFrom utility inside the oaf
|
||||||
|
* class.
|
||||||
|
*
|
||||||
|
* @param original First object
|
||||||
|
* @param enrich second object
|
||||||
|
* @return the merged object
|
||||||
|
*/
|
||||||
|
public static <T extends Oaf> T mergeOAF(T original, T enrich) {
|
||||||
|
if (original == null)
|
||||||
|
return enrich;
|
||||||
|
if (enrich == null)
|
||||||
|
return original;
|
||||||
|
|
||||||
|
if (original.getClass() != enrich.getClass())
|
||||||
|
throw new RuntimeException(String.format("Trying to merge different types of object %s, and %s",
|
||||||
|
original.getClass().getCanonicalName(),
|
||||||
|
enrich.getClass().getCanonicalName()));
|
||||||
|
|
||||||
|
Oaf result ;
|
||||||
|
|
||||||
|
switch (original.getClass().getSimpleName().toLowerCase()) {
|
||||||
|
case "relation":
|
||||||
|
result = mergeOAFRelation((Relation) original, (Relation) enrich);
|
||||||
|
break;
|
||||||
|
case "publication":
|
||||||
|
result = mergeOAFPublication((Publication) original, (Publication) enrich);
|
||||||
|
break;
|
||||||
|
case "dataset":
|
||||||
|
result = mergeOAFDataset((Dataset) original, (Dataset) enrich);
|
||||||
|
break;
|
||||||
|
case "software":
|
||||||
|
result = mergeOAFSoftware((Software) original, (Software) enrich);
|
||||||
|
break;
|
||||||
|
case "otherresearchproduct":
|
||||||
|
result = mergeOAFOtherResearchProduct((OtherResearchProduct) original, (OtherResearchProduct) enrich);
|
||||||
|
break;
|
||||||
|
case "project":
|
||||||
|
result = mergeOAFProject((Project) original, (Project)enrich);
|
||||||
|
break;
|
||||||
|
case "organization":
|
||||||
|
result = mergeOAFOrganization((Organization) original, (Organization)enrich);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new RuntimeException("Trying to merge an unsupported class");
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
result.setCollectedfrom(
|
||||||
|
Stream
|
||||||
|
.concat(
|
||||||
|
Optional
|
||||||
|
.ofNullable(original.getCollectedfrom())
|
||||||
|
.map(Collection::stream)
|
||||||
|
.orElse(Stream.empty()),
|
||||||
|
Optional
|
||||||
|
.ofNullable(enrich.getCollectedfrom())
|
||||||
|
.map(Collection::stream)
|
||||||
|
.orElse(Stream.empty()))
|
||||||
|
.distinct() // relies on KeyValue.equals
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
|
result.setLastupdatetimestamp(
|
||||||
|
Math
|
||||||
|
.max(
|
||||||
|
Optional.ofNullable(original.getLastupdatetimestamp()).orElse(0L),
|
||||||
|
Optional.ofNullable(enrich.getLastupdatetimestamp()).orElse(0L)));
|
||||||
|
|
||||||
|
return (T) result;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@SafeVarargs
|
||||||
|
public static <T> List<T> mergeLists(final List<T>... lists) {
|
||||||
|
|
||||||
|
return Arrays
|
||||||
|
.stream(lists)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.flatMap(List::stream)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.distinct()
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static int compareTrust(Oaf a, Oaf b) {
|
||||||
|
return extractTrust(a).compareTo(extractTrust(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String extractTrust(Oaf e) {
|
||||||
|
if (e == null || e.getDataInfo() == null || e.getDataInfo().getTrust() == null)
|
||||||
|
return "0.0";
|
||||||
|
return e.getDataInfo().getTrust();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static void mergeOAFDataInfo(Oaf from, Oaf to) {
|
||||||
|
Optional.ofNullable(to)
|
||||||
|
.ifPresent(other -> Optional.ofNullable(other.getDataInfo())
|
||||||
|
.ifPresent(otherDataInfo -> Optional.ofNullable(from.getDataInfo())
|
||||||
|
.ifPresent(thisDataInfo -> {
|
||||||
|
if (compareTrust(from, other) < 0 || thisDataInfo.getInvisible()) {
|
||||||
|
from.setDataInfo(otherDataInfo);
|
||||||
|
}
|
||||||
|
})));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets main title.
|
||||||
|
*
|
||||||
|
* @param titles the titles
|
||||||
|
* @return the main title
|
||||||
|
*/
|
||||||
|
private static StructuredProperty getMainTitle(final List<StructuredProperty> titles) {
|
||||||
|
// need to check if the list of titles contains more than 1 main title? (in that case, we should chose which
|
||||||
|
// main title select in the list)
|
||||||
|
for (StructuredProperty t : titles) {
|
||||||
|
if (t.getQualifier() != null && t.getQualifier().getClassid() != null)
|
||||||
|
if (t.getQualifier().getClassid().equals("main title"))
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static OafEntity mergeEntity(final OafEntity original,final OafEntity enrich) {
|
||||||
|
|
||||||
|
final OafEntity mergedEntity = original;
|
||||||
|
|
||||||
|
|
||||||
|
mergedEntity.setOriginalId(mergeLists(mergedEntity.getOriginalId(), enrich.getOriginalId()));
|
||||||
|
|
||||||
|
mergedEntity.setPid(mergeLists(mergedEntity.getPid(), enrich.getPid()));
|
||||||
|
|
||||||
|
|
||||||
|
if (enrich.getDateofcollection() != null && compareTrust(mergedEntity, enrich) < 0)
|
||||||
|
mergedEntity.setDateofcollection(enrich.getDateofcollection());
|
||||||
|
|
||||||
|
if (enrich.getDateoftransformation() != null && compareTrust(mergedEntity, enrich) < 0)
|
||||||
|
mergedEntity.setDateoftransformation(enrich.getDateoftransformation());
|
||||||
|
|
||||||
|
mergedEntity.setExtraInfo(mergeLists(mergedEntity.getExtraInfo(), enrich.getExtraInfo()));
|
||||||
|
|
||||||
|
if (enrich.getOaiprovenance() != null && compareTrust(mergedEntity, enrich) < 0)
|
||||||
|
mergedEntity.setOaiprovenance(enrich.getOaiprovenance());
|
||||||
|
|
||||||
|
return mergedEntity;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Result mergeOAFResult(final Result original,final Result enrich) {
|
||||||
|
|
||||||
|
|
||||||
|
final Result mergedResult = (Result) mergeEntity(original, enrich);
|
||||||
|
|
||||||
|
if(mergedResult.getProcessingchargeamount() == null || StringUtils.isBlank(mergedResult.getProcessingchargeamount().getValue() )){
|
||||||
|
mergedResult.setProcessingchargeamount(enrich.getProcessingchargeamount());
|
||||||
|
mergedResult.setProcessingchargecurrency(enrich.getProcessingchargecurrency());
|
||||||
|
}
|
||||||
|
|
||||||
|
mergedResult.setMeasures(mergeLists(mergedResult.getMeasures(), enrich.getMeasures()));
|
||||||
|
|
||||||
|
if( !isAnEnrichment(mergedResult) && !isAnEnrichment(enrich))
|
||||||
|
mergedResult.setInstance(mergeLists(mergedResult.getInstance(), enrich.getInstance()));
|
||||||
|
else {
|
||||||
|
final List<Instance> enrichmentInstances = isAnEnrichment(mergedResult) ? mergedResult.getInstance() : enrich.getInstance();
|
||||||
|
final List<Instance> enrichedInstances= isAnEnrichment(mergedResult) ? enrich.getInstance(): mergedResult.getInstance();
|
||||||
|
if (isAnEnrichment(mergedResult))
|
||||||
|
mergedResult.setDataInfo(enrich.getDataInfo());
|
||||||
|
mergedResult.setInstance(enrichInstances(enrichedInstances,enrichmentInstances));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (enrich.getBestaccessright() != null
|
||||||
|
&& new AccessRightComparator().compare(enrich.getBestaccessright(), mergedResult.getBestaccessright()) < 0)
|
||||||
|
mergedResult.setBestaccessright(enrich.getBestaccessright());
|
||||||
|
|
||||||
|
if (enrich.getResulttype() != null && compareTrust(mergedResult, enrich) < 0)
|
||||||
|
mergedResult.setResulttype(enrich.getResulttype());
|
||||||
|
|
||||||
|
if (enrich.getLanguage() != null && compareTrust(mergedResult, enrich) < 0)
|
||||||
|
mergedResult.setLanguage(enrich.getLanguage());
|
||||||
|
|
||||||
|
if (Objects.nonNull(enrich.getDateofacceptance())) {
|
||||||
|
if (Objects.isNull(mergedResult.getDateofacceptance())) {
|
||||||
|
mergedResult.setDateofacceptance(enrich.getDateofacceptance());
|
||||||
|
} else if (compareTrust(mergedResult, enrich) < 0) {
|
||||||
|
mergedResult.setDateofacceptance(enrich.getDateofacceptance());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mergedResult.setCountry(mergeLists(mergedResult.getCountry(), enrich.getCountry()));
|
||||||
|
|
||||||
|
mergedResult.setSubject(mergeLists(mergedResult.getSubject(), enrich.getSubject()));
|
||||||
|
|
||||||
|
// merge title lists: main title with higher trust and distinct between the others
|
||||||
|
StructuredProperty baseMainTitle = null;
|
||||||
|
if (mergedResult.getTitle() != null) {
|
||||||
|
baseMainTitle = getMainTitle(mergedResult.getTitle());
|
||||||
|
if (baseMainTitle != null) {
|
||||||
|
final StructuredProperty p = baseMainTitle;
|
||||||
|
mergedResult.setTitle(mergedResult.getTitle().stream().filter(t -> t != p).collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
StructuredProperty newMainTitle = null;
|
||||||
|
if (enrich.getTitle() != null) {
|
||||||
|
newMainTitle = getMainTitle(enrich.getTitle());
|
||||||
|
if (newMainTitle != null) {
|
||||||
|
final StructuredProperty p = newMainTitle;
|
||||||
|
enrich.setTitle(enrich.getTitle().stream().filter(t -> t != p).collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (newMainTitle != null && compareTrust(mergedResult, enrich) < 0) {
|
||||||
|
baseMainTitle = newMainTitle;
|
||||||
|
}
|
||||||
|
|
||||||
|
mergedResult.setTitle(mergeLists(mergedResult.getTitle(), enrich.getTitle()));
|
||||||
|
if (mergedResult.getTitle() != null && baseMainTitle != null) {
|
||||||
|
mergedResult.getTitle().add(baseMainTitle);
|
||||||
|
}
|
||||||
|
|
||||||
|
mergedResult.setRelevantdate (mergeLists(mergedResult.getRelevantdate(), enrich.getRelevantdate()));
|
||||||
|
|
||||||
|
mergedResult.setDescription( longestLists(mergedResult.getDescription(), enrich.getDescription()));
|
||||||
|
|
||||||
|
if (enrich.getPublisher() != null && compareTrust(mergedResult, enrich) < 0)
|
||||||
|
mergedResult.setPublisher(enrich.getPublisher());
|
||||||
|
|
||||||
|
if (enrich.getEmbargoenddate() != null && compareTrust(mergedResult, enrich) < 0)
|
||||||
|
mergedResult.setEmbargoenddate(enrich.getEmbargoenddate());
|
||||||
|
|
||||||
|
mergedResult.setSource(mergeLists(mergedResult.getSource(), enrich.getSource()));
|
||||||
|
|
||||||
|
mergedResult.setFulltext(mergeLists(mergedResult.getFulltext(), enrich.getFulltext()));
|
||||||
|
|
||||||
|
mergedResult.setFormat(mergeLists(mergedResult.getFormat(), enrich.getFormat()));
|
||||||
|
|
||||||
|
mergedResult.setContributor(mergeLists(mergedResult.getContributor(), enrich.getContributor()));
|
||||||
|
|
||||||
|
if (enrich.getResourcetype() != null)
|
||||||
|
mergedResult.setResourcetype(enrich.getResourcetype());
|
||||||
|
|
||||||
|
mergedResult.setCoverage(mergeLists(mergedResult.getCoverage(), enrich.getCoverage()));
|
||||||
|
|
||||||
|
mergedResult.setContext(mergeLists(mergedResult.getContext(), enrich.getContext()));
|
||||||
|
|
||||||
|
mergedResult.setExternalReference(mergeLists(mergedResult.getExternalReference(), enrich.getExternalReference()));
|
||||||
|
|
||||||
|
return mergedResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static OtherResearchProduct mergeOAFOtherResearchProduct(final OtherResearchProduct original,final OtherResearchProduct enrich) {
|
||||||
|
final OtherResearchProduct mergedORP = (OtherResearchProduct) mergeOAFResult(original, enrich);
|
||||||
|
|
||||||
|
|
||||||
|
mergedORP.setContactperson(mergeLists(mergedORP.getContactperson(), enrich.getContactperson()));
|
||||||
|
mergedORP.setContactgroup(mergeLists(mergedORP.getContactgroup(), enrich.getContactgroup()));
|
||||||
|
mergedORP.setTool(mergeLists(mergedORP.getTool(), enrich.getTool()));
|
||||||
|
mergeOAFDataInfo(mergedORP, enrich);
|
||||||
|
|
||||||
|
return mergedORP;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Software mergeOAFSoftware(final Software original, final Software enrich) {
|
||||||
|
final Software mergedSoftware = (Software) mergeOAFResult(original, enrich);
|
||||||
|
|
||||||
|
mergedSoftware.setDocumentationUrl(mergeLists(mergedSoftware.getDocumentationUrl(), enrich.getDocumentationUrl()));
|
||||||
|
|
||||||
|
mergedSoftware.setLicense(mergeLists(mergedSoftware.getLicense(), enrich.getLicense()));
|
||||||
|
|
||||||
|
mergedSoftware.setCodeRepositoryUrl(enrich.getCodeRepositoryUrl() != null && compareTrust(mergedSoftware,enrich) < 0
|
||||||
|
? enrich.getCodeRepositoryUrl()
|
||||||
|
: mergedSoftware.getCodeRepositoryUrl());
|
||||||
|
|
||||||
|
mergedSoftware.setProgrammingLanguage(enrich.getProgrammingLanguage() != null && compareTrust(mergedSoftware, enrich) < 0
|
||||||
|
? enrich.getProgrammingLanguage()
|
||||||
|
: mergedSoftware.getProgrammingLanguage());
|
||||||
|
|
||||||
|
mergeOAFDataInfo(mergedSoftware, enrich);
|
||||||
|
return mergedSoftware;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Dataset mergeOAFDataset(Dataset original, Dataset enrich) {
|
||||||
|
|
||||||
|
final Dataset mergedDataset = (Dataset) mergeOAFResult(original, enrich);
|
||||||
|
|
||||||
|
mergedDataset.setStoragedate(enrich.getStoragedate() != null && compareTrust(mergedDataset, enrich) < 0 ? enrich.getStoragedate() : mergedDataset.getStoragedate());
|
||||||
|
|
||||||
|
mergedDataset.setDevice(enrich.getDevice() != null && compareTrust(mergedDataset, enrich) < 0 ? enrich.getDevice() : mergedDataset.getDevice());
|
||||||
|
|
||||||
|
mergedDataset.setSize(enrich.getSize() != null && compareTrust(mergedDataset, enrich) < 0 ? enrich.getSize() : mergedDataset.getSize());
|
||||||
|
|
||||||
|
mergedDataset.setVersion(enrich.getVersion() != null && compareTrust(mergedDataset, enrich) < 0 ? enrich.getVersion() : mergedDataset.getVersion());
|
||||||
|
|
||||||
|
mergedDataset.setLastmetadataupdate(
|
||||||
|
enrich.getLastmetadataupdate() != null && compareTrust(mergedDataset,enrich) < 0
|
||||||
|
? enrich.getLastmetadataupdate()
|
||||||
|
: mergedDataset.getLastmetadataupdate());
|
||||||
|
|
||||||
|
mergedDataset.setMetadataversionnumber(
|
||||||
|
enrich.getMetadataversionnumber() != null && compareTrust(mergedDataset, enrich) < 0
|
||||||
|
? enrich.getMetadataversionnumber()
|
||||||
|
: mergedDataset.getMetadataversionnumber());
|
||||||
|
|
||||||
|
mergedDataset.setGeolocation(mergeLists(mergedDataset.getGeolocation(), enrich.getGeolocation()));
|
||||||
|
|
||||||
|
mergeOAFDataInfo(mergedDataset, enrich);
|
||||||
|
|
||||||
|
return mergedDataset;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Publication mergeOAFPublication(Publication original, Publication enrich) {
|
||||||
|
|
||||||
|
final Publication mergedPublication = (Publication) mergeOAFResult(original, enrich);
|
||||||
|
|
||||||
|
if (enrich.getJournal() != null && compareTrust(mergedPublication, enrich) < 0)
|
||||||
|
mergedPublication.setJournal(enrich.getJournal());
|
||||||
|
mergeOAFDataInfo(mergedPublication, enrich);
|
||||||
|
return mergedPublication;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static Relation mergeOAFRelation(Relation from, Relation to) {
|
||||||
|
|
||||||
|
checkArgument(Objects.equals(from.getSource(), to.getSource()), "source ids must be equal");
|
||||||
|
checkArgument(Objects.equals(from.getTarget(), to.getTarget()), "target ids must be equal");
|
||||||
|
checkArgument(Objects.equals(from.getRelType(), to.getRelType()), "relType(s) must be equal");
|
||||||
|
checkArgument(
|
||||||
|
Objects.equals(from.getSubRelType(), to.getSubRelType()), "subRelType(s) must be equal");
|
||||||
|
checkArgument(Objects.equals(from.getRelClass(), to.getRelClass()), "relClass(es) must be equal");
|
||||||
|
|
||||||
|
from.setValidated(from.getValidated() || to.getValidated());
|
||||||
|
try {
|
||||||
|
from.setValidationDate(ModelSupport.oldest(from.getValidationDate(), to.getValidationDate()));
|
||||||
|
} catch (ParseException e) {
|
||||||
|
throw new IllegalArgumentException(String
|
||||||
|
.format(
|
||||||
|
"invalid validation date format in relation [s:%s, t:%s]: %s", from.getSource(), from.getTarget(),
|
||||||
|
from.getValidationDate()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return from;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static Oaf mergeOAFOrganization(Organization original, Organization enrich) {
|
||||||
|
|
||||||
|
final Organization mergedOrganization = (Organization) mergeEntity(original, enrich);
|
||||||
|
|
||||||
|
int ct = compareTrust(mergedOrganization, enrich);
|
||||||
|
mergedOrganization.setLegalshortname(enrich.getLegalshortname() != null && ct < 0
|
||||||
|
? enrich.getLegalshortname()
|
||||||
|
: mergedOrganization.getLegalname());
|
||||||
|
|
||||||
|
|
||||||
|
mergedOrganization.setLegalname(enrich.getLegalname() != null && ct < 0 ?
|
||||||
|
enrich.getLegalname()
|
||||||
|
: mergedOrganization.getLegalname());
|
||||||
|
|
||||||
|
mergedOrganization.setAlternativeNames(mergeLists(enrich.getAlternativeNames(), mergedOrganization.getAlternativeNames()));
|
||||||
|
|
||||||
|
|
||||||
|
mergedOrganization.setWebsiteurl(enrich.getWebsiteurl() != null && ct < 0
|
||||||
|
? enrich.getWebsiteurl()
|
||||||
|
: mergedOrganization.getWebsiteurl());
|
||||||
|
|
||||||
|
mergedOrganization.setLogourl(enrich.getLogourl() != null && ct < 0
|
||||||
|
? enrich.getLogourl()
|
||||||
|
: mergedOrganization.getLogourl());
|
||||||
|
|
||||||
|
mergedOrganization.setEclegalbody(enrich.getEclegalbody() != null && ct < 0
|
||||||
|
? enrich.getEclegalbody()
|
||||||
|
: mergedOrganization.getEclegalbody());
|
||||||
|
|
||||||
|
mergedOrganization.setEclegalperson(enrich.getEclegalperson() != null && ct < 0
|
||||||
|
? enrich.getEclegalperson()
|
||||||
|
: mergedOrganization.getEclegalperson());
|
||||||
|
|
||||||
|
mergedOrganization.setEcnonprofit (enrich.getEcnonprofit() != null && ct< 0
|
||||||
|
? enrich.getEcnonprofit()
|
||||||
|
: mergedOrganization.getEcnonprofit());
|
||||||
|
|
||||||
|
mergedOrganization.setEcresearchorganization (enrich.getEcresearchorganization() != null && ct < 0
|
||||||
|
? enrich.getEcresearchorganization()
|
||||||
|
: mergedOrganization.getEcresearchorganization());
|
||||||
|
|
||||||
|
mergedOrganization.setEchighereducation (enrich.getEchighereducation() != null && ct < 0
|
||||||
|
? enrich.getEchighereducation()
|
||||||
|
: mergedOrganization.getEchighereducation());
|
||||||
|
|
||||||
|
mergedOrganization.setEcinternationalorganizationeurinterests (enrich.getEcinternationalorganizationeurinterests() != null && ct< 0
|
||||||
|
? enrich.getEcinternationalorganizationeurinterests()
|
||||||
|
: mergedOrganization.getEcinternationalorganizationeurinterests());
|
||||||
|
|
||||||
|
mergedOrganization.setEcinternationalorganization (enrich.getEcinternationalorganization() != null && ct < 0
|
||||||
|
? enrich.getEcinternationalorganization()
|
||||||
|
: mergedOrganization.getEcinternationalorganization());
|
||||||
|
|
||||||
|
mergedOrganization.setEcenterprise (enrich.getEcenterprise() != null && ct < 0
|
||||||
|
? enrich.getEcenterprise()
|
||||||
|
: mergedOrganization.getEcenterprise());
|
||||||
|
|
||||||
|
mergedOrganization.setEcsmevalidated (enrich.getEcsmevalidated() != null && ct < 0
|
||||||
|
? enrich.getEcsmevalidated()
|
||||||
|
: mergedOrganization.getEcsmevalidated());
|
||||||
|
mergedOrganization.setEcnutscode( enrich.getEcnutscode() != null && ct < 0
|
||||||
|
? enrich.getEcnutscode()
|
||||||
|
: mergedOrganization.getEcnutscode());
|
||||||
|
|
||||||
|
mergedOrganization.setCountry (enrich.getCountry() != null && ct < 0 ?
|
||||||
|
enrich.getCountry()
|
||||||
|
:mergedOrganization.getCountry());
|
||||||
|
|
||||||
|
mergeOAFDataInfo(mergedOrganization, enrich);
|
||||||
|
|
||||||
|
return mergedOrganization;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Oaf mergeOAFProject(Project original, Project enrich) {
|
||||||
|
|
||||||
|
final Project mergedProject = (Project) mergeEntity(original, enrich);
|
||||||
|
|
||||||
|
int ct = compareTrust(mergedProject, enrich);
|
||||||
|
|
||||||
|
|
||||||
|
mergedProject.setWebsiteurl (enrich.getWebsiteurl() != null && ct < 0
|
||||||
|
? enrich.getWebsiteurl()
|
||||||
|
: mergedProject.getWebsiteurl());
|
||||||
|
|
||||||
|
mergedProject.setCode(enrich.getCode() != null && ct < 0 ?
|
||||||
|
enrich.getCode() :
|
||||||
|
mergedProject.getCode());
|
||||||
|
|
||||||
|
mergedProject.setAcronym(enrich.getAcronym() != null && ct < 0
|
||||||
|
? enrich.getAcronym()
|
||||||
|
: mergedProject.getAcronym());
|
||||||
|
|
||||||
|
mergedProject.setTitle (enrich.getTitle() != null && ct < 0
|
||||||
|
? enrich.getTitle()
|
||||||
|
: mergedProject.getTitle());
|
||||||
|
mergedProject.setStartdate (enrich.getStartdate() != null && ct < 0
|
||||||
|
? enrich.getStartdate()
|
||||||
|
: mergedProject.getStartdate());
|
||||||
|
mergedProject.setEnddate (enrich.getEnddate() != null && ct < 0
|
||||||
|
? enrich.getEnddate()
|
||||||
|
: mergedProject.getEnddate());
|
||||||
|
mergedProject.setCallidentifier ( enrich.getCallidentifier() != null && ct < 0
|
||||||
|
? enrich.getCallidentifier()
|
||||||
|
: mergedProject.getCallidentifier());
|
||||||
|
mergedProject.setKeywords ( enrich.getKeywords() != null && ct < 0
|
||||||
|
? enrich.getKeywords()
|
||||||
|
: mergedProject.getKeywords());
|
||||||
|
|
||||||
|
mergedProject.setDuration ( enrich.getDuration() != null && ct < 0
|
||||||
|
? enrich.getDuration()
|
||||||
|
: mergedProject.getDuration());
|
||||||
|
mergedProject.setEcsc39 ( enrich.getEcsc39() != null && ct < 0
|
||||||
|
? enrich.getEcsc39() :
|
||||||
|
mergedProject.getEcsc39());
|
||||||
|
mergedProject.setOamandatepublications ( enrich.getOamandatepublications() != null && ct < 0
|
||||||
|
? enrich.getOamandatepublications()
|
||||||
|
: mergedProject.getOamandatepublications());
|
||||||
|
mergedProject.setEcarticle29_3 (enrich.getEcarticle29_3() != null && ct < 0
|
||||||
|
? enrich.getEcarticle29_3()
|
||||||
|
: mergedProject.getEcarticle29_3());
|
||||||
|
|
||||||
|
mergedProject.setSubjects (mergeLists(mergedProject.getSubjects(), enrich.getSubjects()));
|
||||||
|
mergedProject.setFundingtree (mergeLists(mergedProject.getFundingtree(), enrich.getFundingtree()));
|
||||||
|
mergedProject.setContracttype (enrich.getContracttype() != null && ct < 0
|
||||||
|
? enrich.getContracttype()
|
||||||
|
: mergedProject.getContracttype());
|
||||||
|
mergedProject.setOptional1 ( enrich.getOptional1() != null && ct < 0
|
||||||
|
? enrich.getOptional1()
|
||||||
|
: mergedProject.getOptional1());
|
||||||
|
mergedProject.setOptional2 (enrich.getOptional2() != null && ct < 0
|
||||||
|
? enrich.getOptional2()
|
||||||
|
: mergedProject.getOptional2());
|
||||||
|
|
||||||
|
mergedProject.setJsonextrainfo ( enrich.getJsonextrainfo() != null && ct < 0
|
||||||
|
? enrich.getJsonextrainfo()
|
||||||
|
: mergedProject.getJsonextrainfo());
|
||||||
|
|
||||||
|
mergedProject.setContactfullname ( enrich.getContactfullname() != null && ct < 0
|
||||||
|
? enrich.getContactfullname()
|
||||||
|
: mergedProject.getContactfullname());
|
||||||
|
|
||||||
|
mergedProject.setContactfax ( enrich.getContactfax() != null && ct < 0
|
||||||
|
? enrich.getContactfax()
|
||||||
|
: mergedProject.getContactfax());
|
||||||
|
|
||||||
|
mergedProject.setContactphone (enrich.getContactphone() != null && ct < 0
|
||||||
|
? enrich.getContactphone()
|
||||||
|
: mergedProject.getContactphone());
|
||||||
|
|
||||||
|
mergedProject.setContactemail ( enrich.getContactemail() != null && ct < 0
|
||||||
|
? enrich.getContactemail()
|
||||||
|
: mergedProject.getContactemail());
|
||||||
|
|
||||||
|
mergedProject.setSummary ( enrich.getSummary() != null && ct < 0
|
||||||
|
? enrich.getSummary()
|
||||||
|
: mergedProject.getSummary());
|
||||||
|
|
||||||
|
mergedProject.setCurrency( enrich.getCurrency() != null && ct < 0
|
||||||
|
? enrich.getCurrency()
|
||||||
|
: mergedProject.getCurrency());
|
||||||
|
|
||||||
|
if (enrich.getH2020topiccode() != null && StringUtils.isEmpty(mergedProject.getH2020topiccode())){
|
||||||
|
mergedProject.setH2020topiccode(enrich.getH2020topiccode());
|
||||||
|
mergedProject.setH2020topicdescription(enrich.getH2020topicdescription());
|
||||||
|
}
|
||||||
|
|
||||||
|
mergedProject.setH2020classification (mergeLists(mergedProject.getH2020classification(), enrich.getH2020classification()));
|
||||||
|
|
||||||
|
mergeOAFDataInfo(mergedProject, enrich);
|
||||||
|
|
||||||
|
return mergedProject;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
public class OrganizationPidComparator implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||||
|
|
||||||
|
PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
|
||||||
|
PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.openorgs))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.openorgs))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.GRID))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.GRID))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.mag_id))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.mag_id))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.urn))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.urn))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,8 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
|
||||||
|
public class PidBlacklist extends HashMap<String, HashSet<String>> {
|
||||||
|
}
|
|
@ -0,0 +1,39 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
public class PidBlacklistProvider {
|
||||||
|
|
||||||
|
private static final PidBlacklist blacklist;
|
||||||
|
|
||||||
|
static {
|
||||||
|
try {
|
||||||
|
String json = IOUtils.toString(IdentifierFactory.class.getResourceAsStream("pid_blacklist.json"));
|
||||||
|
blacklist = new ObjectMapper().readValue(json, PidBlacklist.class);
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PidBlacklist getBlacklist() {
|
||||||
|
return blacklist;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Set<String> getBlacklist(String pidType) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(getBlacklist().get(pidType))
|
||||||
|
.orElse(new HashSet<>());
|
||||||
|
}
|
||||||
|
|
||||||
|
private PidBlacklistProvider() {}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
public class PidComparator<T extends OafEntity> implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
|
private final T entity;
|
||||||
|
|
||||||
|
public PidComparator(T entity) {
|
||||||
|
this.entity = entity;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||||
|
|
||||||
|
if (left == null && right == null)
|
||||||
|
return 0;
|
||||||
|
if (left == null)
|
||||||
|
return 1;
|
||||||
|
if (right == null)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (ModelSupport.isSubClass(entity, Result.class)) {
|
||||||
|
return compareResultPids(left, right);
|
||||||
|
}
|
||||||
|
if (ModelSupport.isSubClass(entity, Organization.class)) {
|
||||||
|
return compareOrganizationtPids(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Else (but unlikely), lexicographical ordering will do.
|
||||||
|
return left.getQualifier().getClassid().compareTo(right.getQualifier().getClassid());
|
||||||
|
}
|
||||||
|
|
||||||
|
private int compareResultPids(StructuredProperty left, StructuredProperty right) {
|
||||||
|
return new ResultPidComparator().compare(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int compareOrganizationtPids(StructuredProperty left, StructuredProperty right) {
|
||||||
|
return new OrganizationPidComparator().compare(left, right);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.EnumUtils;
|
||||||
|
|
||||||
|
public enum PidType {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The DOI syntax shall be made up of a DOI prefix and a DOI suffix separated by a forward slash.
|
||||||
|
*
|
||||||
|
* There is no defined limit on the length of the DOI name, or of the DOI prefix or DOI suffix.
|
||||||
|
*
|
||||||
|
* The DOI name is case-insensitive and can incorporate any printable characters from the legal graphic characters
|
||||||
|
* of Unicode. Further constraints on character use (e.g. use of language-specific alphanumeric characters) can be
|
||||||
|
* defined for an application by the ISO 26324 Registration Authority.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* DOI prefix: The DOI prefix shall be composed of a directory indicator followed by a registrant code.
|
||||||
|
* These two components shall be separated by a full stop (period). The directory indicator shall be "10" and
|
||||||
|
* distinguishes the entire set of character strings (prefix and suffix) as digital object identifiers within the
|
||||||
|
* resolution system.
|
||||||
|
*
|
||||||
|
* Registrant code: The second element of the DOI prefix shall be the registrant code. The registrant code is a
|
||||||
|
* unique string assigned to a registrant.
|
||||||
|
*
|
||||||
|
* DOI suffix: The DOI suffix shall consist of a character string of any length chosen by the registrant.
|
||||||
|
* Each suffix shall be unique to the prefix element that precedes it. The unique suffix can be a sequential number,
|
||||||
|
* or it might incorporate an identifier generated from or based on another system used by the registrant
|
||||||
|
* (e.g. ISAN, ISBN, ISRC, ISSN, ISTC, ISNI; in such cases, a preferred construction for such a suffix can be
|
||||||
|
* specified, as in Example 1).
|
||||||
|
*
|
||||||
|
* Source: https://www.doi.org/doi_handbook/2_Numbering.html#2.2
|
||||||
|
*/
|
||||||
|
doi,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PubMed Unique Identifier (PMID)
|
||||||
|
*
|
||||||
|
* This field is a 1-to-8 digit accession number with no leading zeros. It is present on all records and is the
|
||||||
|
* accession number for managing and disseminating records. PMIDs are not reused after records are deleted.
|
||||||
|
*
|
||||||
|
* Beginning in February 2012 PMIDs include extensions following a decimal point to account for article versions
|
||||||
|
* (e.g., 21804956.2). All citations are considered version 1 until replaced. The extended PMID is not displayed
|
||||||
|
* on the MEDLINE format.
|
||||||
|
*
|
||||||
|
* View the citation in abstract format in PubMed to access additional versions when available (see the article in
|
||||||
|
* the Jan-Feb 2012 NLM Technical Bulletin).
|
||||||
|
*
|
||||||
|
* Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid
|
||||||
|
*/
|
||||||
|
pmid,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This field contains the unique identifier for the cited article in PubMed Central. The identifier begins with the
|
||||||
|
* prefix PMC.
|
||||||
|
*
|
||||||
|
* Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc
|
||||||
|
*/
|
||||||
|
pmc, handle, arXiv, nct, pdb,
|
||||||
|
|
||||||
|
// Organization
|
||||||
|
openorgs, corda, corda_h2020, GRID, mag_id, urn,
|
||||||
|
|
||||||
|
// Used by dedup
|
||||||
|
undefined, original;
|
||||||
|
|
||||||
|
public static boolean isValid(String type) {
|
||||||
|
return EnumUtils.isValidEnum(PidType.class, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PidType tryValueOf(String s) {
|
||||||
|
try {
|
||||||
|
return PidType.valueOf(s);
|
||||||
|
} catch (Exception e) {
|
||||||
|
return PidType.original;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
public class PidValueComparator implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||||
|
|
||||||
|
if (left == null && right == null)
|
||||||
|
return 0;
|
||||||
|
if (left == null)
|
||||||
|
return 1;
|
||||||
|
if (right == null)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
StructuredProperty l = CleaningFunctions.normalizePidValue(left);
|
||||||
|
StructuredProperty r = CleaningFunctions.normalizePidValue(right);
|
||||||
|
|
||||||
|
return Optional
|
||||||
|
.ofNullable(l.getValue())
|
||||||
|
.map(
|
||||||
|
lv -> Optional
|
||||||
|
.ofNullable(r.getValue())
|
||||||
|
.map(rv -> lv.compareTo(rv))
|
||||||
|
.orElse(-1))
|
||||||
|
.orElse(1);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
public class ResultPidComparator implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||||
|
|
||||||
|
PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
|
||||||
|
PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.doi))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.doi))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.pmid))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.pmid))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.pmc))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.pmc))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.handle))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.handle))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.arXiv))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.arXiv))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.nct))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.nct))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.pdb))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.pdb))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
public class ResultTypeComparator implements Comparator<Result> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(Result left, Result right) {
|
||||||
|
|
||||||
|
if (left == null && right == null)
|
||||||
|
return 0;
|
||||||
|
if (left == null)
|
||||||
|
return 1;
|
||||||
|
if (right == null)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
HashSet<String> lCf = getCollectedFromIds(left);
|
||||||
|
HashSet<String> rCf = getCollectedFromIds(right);
|
||||||
|
|
||||||
|
if (lCf.contains(CROSSREF_ID) && !rCf.contains(CROSSREF_ID)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (!lCf.contains(CROSSREF_ID) && rCf.contains(CROSSREF_ID)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
String lClass = left.getResulttype().getClassid();
|
||||||
|
String rClass = right.getResulttype().getClassid();
|
||||||
|
|
||||||
|
if (lClass.equals(rClass))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
// Else (but unlikely), lexicographical ordering will do.
|
||||||
|
return lClass.compareTo(rClass);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected HashSet<String> getCollectedFromIds(Result left) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(left.getCollectedfrom())
|
||||||
|
.map(
|
||||||
|
cf -> cf
|
||||||
|
.stream()
|
||||||
|
.map(KeyValue::getKey)
|
||||||
|
.collect(Collectors.toCollection(HashSet::new)))
|
||||||
|
.orElse(new HashSet<>());
|
||||||
|
}
|
||||||
|
}
|
|
@ -7,9 +7,14 @@ import java.security.MessageDigest;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import org.apache.commons.codec.binary.Hex;
|
import org.apache.commons.codec.binary.Hex;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.lang3.tuple.ImmutablePair;
|
||||||
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
@ -32,6 +37,9 @@ import net.minidev.json.JSONArray;
|
||||||
import scala.collection.JavaConverters;
|
import scala.collection.JavaConverters;
|
||||||
import scala.collection.Seq;
|
import scala.collection.Seq;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The type Dhp utils.
|
||||||
|
*/
|
||||||
public class DHPUtils {
|
public class DHPUtils {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
|
private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
|
||||||
|
@ -39,10 +47,22 @@ public class DHPUtils {
|
||||||
private DHPUtils() {
|
private DHPUtils() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To seq seq.
|
||||||
|
*
|
||||||
|
* @param list the list
|
||||||
|
* @return the seq
|
||||||
|
*/
|
||||||
public static Seq<String> toSeq(List<String> list) {
|
public static Seq<String> toSeq(List<String> list) {
|
||||||
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
|
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Md 5 string.
|
||||||
|
*
|
||||||
|
* @param s the s
|
||||||
|
* @return the string
|
||||||
|
*/
|
||||||
public static String md5(final String s) {
|
public static String md5(final String s) {
|
||||||
try {
|
try {
|
||||||
final MessageDigest md = MessageDigest.getInstance("MD5");
|
final MessageDigest md = MessageDigest.getInstance("MD5");
|
||||||
|
@ -54,9 +74,66 @@ public class DHPUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private static Pair<Qualifier, Qualifier> searchTypeInVocabularies(final String aType, final VocabularyGroup vocabularies) {
|
||||||
|
|
||||||
|
if (StringUtils.isNotBlank(aType)) {
|
||||||
|
final Qualifier typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, aType);
|
||||||
|
if (typeQualifier != null)
|
||||||
|
return new ImmutablePair<>(typeQualifier, vocabularies.getSynonymAsQualifier(
|
||||||
|
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
||||||
|
typeQualifier.getClassid()
|
||||||
|
));
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve oaf type from vocabulary pair.
|
||||||
|
*
|
||||||
|
* This method tries to find the correct oaf type general and instance type from
|
||||||
|
* vocabularies giving this order:
|
||||||
|
* 1 - search a vocabulary synonym from subResourceType
|
||||||
|
* 2 - search a vocabulary synonym from otherResourceType
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @param resourceTypeGeneral the resource type general
|
||||||
|
* @param subResourceType the sub resource type
|
||||||
|
* @param otherResourceType the other resource type
|
||||||
|
* @param vocabularies the vocabularies
|
||||||
|
* @return the pair
|
||||||
|
*/
|
||||||
|
public static Pair<Qualifier, Qualifier> retrieveOAFTypeFromVocabulary(final String resourceTypeGeneral , final String subResourceType, final String otherResourceType, final VocabularyGroup vocabularies ) {
|
||||||
|
|
||||||
|
if (StringUtils.isNotBlank(subResourceType)) {
|
||||||
|
Pair<Qualifier, Qualifier> result = searchTypeInVocabularies(subResourceType, vocabularies);
|
||||||
|
if (result!= null)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (StringUtils.isNotBlank(otherResourceType)) {
|
||||||
|
Pair<Qualifier, Qualifier> result = searchTypeInVocabularies(otherResourceType, vocabularies);
|
||||||
|
if (result!= null)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (StringUtils.isNotBlank(resourceTypeGeneral)) {
|
||||||
|
Pair<Qualifier, Qualifier> result = searchTypeInVocabularies(resourceTypeGeneral, vocabularies);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves from the metadata store manager application the list of paths associated with mdstores characterized
|
* Retrieves from the metadata store manager application the list of paths associated with mdstores characterized
|
||||||
* by he given format, layout, interpretation
|
* by he given format, layout, interpretation
|
||||||
|
*
|
||||||
* @param mdstoreManagerUrl the URL of the mdstore manager service
|
* @param mdstoreManagerUrl the URL of the mdstore manager service
|
||||||
* @param format the mdstore format
|
* @param format the mdstore format
|
||||||
* @param layout the mdstore layout
|
* @param layout the mdstore layout
|
||||||
|
@ -93,10 +170,24 @@ public class DHPUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate identifier string.
|
||||||
|
*
|
||||||
|
* @param originalId the original id
|
||||||
|
* @param nsPrefix the ns prefix
|
||||||
|
* @return the string
|
||||||
|
*/
|
||||||
public static String generateIdentifier(final String originalId, final String nsPrefix) {
|
public static String generateIdentifier(final String originalId, final String nsPrefix) {
|
||||||
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
|
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate unresolved identifier string.
|
||||||
|
*
|
||||||
|
* @param pid the pid
|
||||||
|
* @param pidType the pid type
|
||||||
|
* @return the string
|
||||||
|
*/
|
||||||
public static String generateUnresolvedIdentifier(final String pid, final String pidType) {
|
public static String generateUnresolvedIdentifier(final String pid, final String pidType) {
|
||||||
|
|
||||||
final String cleanedPid = CleaningFunctions.normalizePidValue(pidType, pid);
|
final String cleanedPid = CleaningFunctions.normalizePidValue(pidType, pid);
|
||||||
|
@ -104,6 +195,13 @@ public class DHPUtils {
|
||||||
return String.format("unresolved::%s::%s", cleanedPid, pidType.toLowerCase().trim());
|
return String.format("unresolved::%s::%s", cleanedPid, pidType.toLowerCase().trim());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets j path string.
|
||||||
|
*
|
||||||
|
* @param jsonPath the json path
|
||||||
|
* @param json the json
|
||||||
|
* @return the j path string
|
||||||
|
*/
|
||||||
public static String getJPathString(final String jsonPath, final String json) {
|
public static String getJPathString(final String jsonPath, final String json) {
|
||||||
try {
|
try {
|
||||||
Object o = JsonPath.read(json, jsonPath);
|
Object o = JsonPath.read(json, jsonPath);
|
||||||
|
@ -117,8 +215,19 @@ public class DHPUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The constant MAPPER.
|
||||||
|
*/
|
||||||
public static final ObjectMapper MAPPER = new ObjectMapper();
|
public static final ObjectMapper MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write hdfs file.
|
||||||
|
*
|
||||||
|
* @param conf the conf
|
||||||
|
* @param content the content
|
||||||
|
* @param path the path
|
||||||
|
* @throws IOException the io exception
|
||||||
|
*/
|
||||||
public static void writeHdfsFile(final Configuration conf, final String content, final String path)
|
public static void writeHdfsFile(final Configuration conf, final String content, final String path)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
|
@ -130,6 +239,14 @@ public class DHPUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read hdfs file string.
|
||||||
|
*
|
||||||
|
* @param conf the conf
|
||||||
|
* @param path the path
|
||||||
|
* @return the string
|
||||||
|
* @throws IOException the io exception
|
||||||
|
*/
|
||||||
public static String readHdfsFile(Configuration conf, String path) throws IOException {
|
public static String readHdfsFile(Configuration conf, String path) throws IOException {
|
||||||
log.info("reading file {}", path);
|
log.info("reading file {}", path);
|
||||||
|
|
||||||
|
@ -142,10 +259,27 @@ public class DHPUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read hdfs file as t.
|
||||||
|
*
|
||||||
|
* @param <T> the type parameter
|
||||||
|
* @param conf the conf
|
||||||
|
* @param path the path
|
||||||
|
* @param clazz the clazz
|
||||||
|
* @return the t
|
||||||
|
* @throws IOException the io exception
|
||||||
|
*/
|
||||||
public static <T> T readHdfsFileAs(Configuration conf, String path, Class<T> clazz) throws IOException {
|
public static <T> T readHdfsFileAs(Configuration conf, String path, Class<T> clazz) throws IOException {
|
||||||
return MAPPER.readValue(readHdfsFile(conf, path), clazz);
|
return MAPPER.readValue(readHdfsFile(conf, path), clazz);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save dataset.
|
||||||
|
*
|
||||||
|
* @param <T> the type parameter
|
||||||
|
* @param mdstore the mdstore
|
||||||
|
* @param targetPath the target path
|
||||||
|
*/
|
||||||
public static <T> void saveDataset(final Dataset<T> mdstore, final String targetPath) {
|
public static <T> void saveDataset(final Dataset<T> mdstore, final String targetPath) {
|
||||||
log.info("saving dataset in: {}", targetPath);
|
log.info("saving dataset in: {}", targetPath);
|
||||||
mdstore
|
mdstore
|
||||||
|
@ -155,6 +289,12 @@ public class DHPUtils {
|
||||||
.save(targetPath);
|
.save(targetPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets hadoop configuration.
|
||||||
|
*
|
||||||
|
* @param nameNode the name node
|
||||||
|
* @return the hadoop configuration
|
||||||
|
*/
|
||||||
public static Configuration getHadoopConfiguration(String nameNode) {
|
public static Configuration getHadoopConfiguration(String nameNode) {
|
||||||
// ====== Init HDFS File System Object
|
// ====== Init HDFS File System Object
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
|
@ -168,6 +308,12 @@ public class DHPUtils {
|
||||||
return conf;
|
return conf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Populate oozie env.
|
||||||
|
*
|
||||||
|
* @param report the report
|
||||||
|
* @throws IOException the io exception
|
||||||
|
*/
|
||||||
public static void populateOOZIEEnv(final Map<String, String> report) throws IOException {
|
public static void populateOOZIEEnv(final Map<String, String> report) throws IOException {
|
||||||
File file = new File(System.getProperty("oozie.action.output.properties"));
|
File file = new File(System.getProperty("oozie.action.output.properties"));
|
||||||
Properties props = new Properties();
|
Properties props = new Properties();
|
||||||
|
@ -178,6 +324,13 @@ public class DHPUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Populate oozie env.
|
||||||
|
*
|
||||||
|
* @param paramName the param name
|
||||||
|
* @param value the value
|
||||||
|
* @throws IOException the io exception
|
||||||
|
*/
|
||||||
public static void populateOOZIEEnv(final String paramName, String value) throws IOException {
|
public static void populateOOZIEEnv(final String paramName, String value) throws IOException {
|
||||||
Map<String, String> report = Maps.newHashMap();
|
Map<String, String> report = Maps.newHashMap();
|
||||||
report.put(paramName, value);
|
report.put(paramName, value);
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
||||||
package eu.dnetlib.dhp.datacite
|
package eu.dnetlib.dhp.client
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils
|
import org.apache.commons.io.IOUtils
|
||||||
import org.apache.http.client.config.RequestConfig
|
import org.apache.http.client.config.RequestConfig
|
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
class BlackListProviderTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void blackListTest() {
|
||||||
|
|
||||||
|
Assertions.assertNotNull(PidBlacklistProvider.getBlacklist());
|
||||||
|
Assertions.assertNotNull(PidBlacklistProvider.getBlacklist().get("doi"));
|
||||||
|
Assertions.assertTrue(PidBlacklistProvider.getBlacklist().get("doi").size() > 0);
|
||||||
|
final Set<String> xxx = PidBlacklistProvider.getBlacklist("xxx");
|
||||||
|
Assertions.assertNotNull(xxx);
|
||||||
|
Assertions.assertEquals(0, xxx.size());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,80 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
|
||||||
|
class IdentifierFactoryTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||||
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCreateIdentifierForPublication() throws IOException {
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi1.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi2.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi3.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi5.json", "50|doi_________::3bef95c0ca26dd55451fc8839ea69d27", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_pmc2.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_openapc.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
|
||||||
|
|
||||||
|
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
|
||||||
|
verifyIdentifier("publication_3.json", defaultID, true);
|
||||||
|
verifyIdentifier("publication_4.json", defaultID, true);
|
||||||
|
verifyIdentifier("publication_5.json", defaultID, true);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCreateIdentifierForPublicationNoHash() throws IOException {
|
||||||
|
|
||||||
|
verifyIdentifier("publication_doi1.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
|
||||||
|
verifyIdentifier("publication_doi2.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
|
||||||
|
verifyIdentifier("publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_urn1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
|
||||||
|
|
||||||
|
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
|
||||||
|
verifyIdentifier("publication_3.json", defaultID, false);
|
||||||
|
verifyIdentifier("publication_4.json", defaultID, false);
|
||||||
|
verifyIdentifier("publication_5.json", defaultID, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void verifyIdentifier(String filename, String expectedID, boolean md5) throws IOException {
|
||||||
|
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
|
||||||
|
final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class);
|
||||||
|
|
||||||
|
String id = IdentifierFactory.createIdentifier(pub, md5);
|
||||||
|
|
||||||
|
assertNotNull(id);
|
||||||
|
assertEquals(expectedID, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,617 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.time.format.DateTimeParseException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The type Merge test.
|
||||||
|
*/
|
||||||
|
class MergeTest {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Oaf.
|
||||||
|
*/
|
||||||
|
OafEntity oaf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets up.
|
||||||
|
*/
|
||||||
|
@BeforeEach
|
||||||
|
public void setUp() {
|
||||||
|
oaf = new Publication();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge lists test.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
void mergeListsTest() {
|
||||||
|
|
||||||
|
// string list merge test
|
||||||
|
List<String> a = Arrays.asList("a", "b", "c", "e");
|
||||||
|
List<String> b = Arrays.asList("a", "b", "c", "d");
|
||||||
|
List<String> c = null;
|
||||||
|
|
||||||
|
System.out.println("merge result 1 = " + OafUtils.mergeLists(a, b));
|
||||||
|
|
||||||
|
System.out.println("merge result 2 = " + OafUtils.mergeLists(a, c));
|
||||||
|
|
||||||
|
System.out.println("merge result 3 = " + OafUtils.mergeLists(c, c));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge publication collected from test.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergePublicationCollectedFromTest() {
|
||||||
|
|
||||||
|
Publication a = publication();
|
||||||
|
Publication b = publication();
|
||||||
|
|
||||||
|
a.setCollectedfrom(Arrays.asList(setKV("a", "open"), setKV("b", "closed")));
|
||||||
|
b.setCollectedfrom(Arrays.asList(setKV("A", "open"), setKV("b", "Open")));
|
||||||
|
|
||||||
|
a = OafUtils.mergeOAF(a, b);
|
||||||
|
|
||||||
|
|
||||||
|
assertNotNull(a.getCollectedfrom());
|
||||||
|
assertEquals(3, a.getCollectedfrom().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load resource result list.
|
||||||
|
*
|
||||||
|
* @param <T> the type parameter
|
||||||
|
* @param path the path
|
||||||
|
* @param clazz the clazz
|
||||||
|
* @return the list
|
||||||
|
* @throws Exception the exception
|
||||||
|
*/
|
||||||
|
private <T extends Result> List<T> loadResourceResult(final String path, final Class<T> clazz ) throws Exception {
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
final InputStream str = Objects.requireNonNull(getClass().getResourceAsStream(path));
|
||||||
|
// LOAD test publications
|
||||||
|
return IOUtils.readLines(str).stream().map(s -> {
|
||||||
|
try {
|
||||||
|
return mapper.readValue(s, clazz);
|
||||||
|
} catch (IOException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply to any test list result the same pid of the enrichment instance
|
||||||
|
*
|
||||||
|
* @param source the source
|
||||||
|
* @param enrichment the enrichment
|
||||||
|
* @param overrideAlternateIdentifier the override alternate identifier
|
||||||
|
*/
|
||||||
|
private <T extends Result> void updatePidIntoPublicationInstance(final List<T> source, final List<T>enrichment, final boolean overrideAlternateIdentifier) {
|
||||||
|
for(int i = 0 ; i< source.size(); i++) {
|
||||||
|
final Result currentPub = source.get(i);
|
||||||
|
final Result currentEnrichment = enrichment.get(i);
|
||||||
|
final Instance currentInstance = Objects.requireNonNull(currentPub.getInstance()).get(0);
|
||||||
|
if (overrideAlternateIdentifier)
|
||||||
|
currentInstance.setAlternateIdentifier(Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid());
|
||||||
|
else
|
||||||
|
currentInstance.setPid(Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private <T extends Result> void applyAndVerifyEnrichment(final List<T> l1, final List<T> l2) {
|
||||||
|
// Apply Merge and verify that enrichments works
|
||||||
|
for(int i = 0 ; i< l1.size(); i++) {
|
||||||
|
final Result currentPub = l2.get(i);
|
||||||
|
final Result currentEnrichment = l1.get(i);
|
||||||
|
final Result result = OafUtils.mergeOAF(currentPub, currentEnrichment);
|
||||||
|
assertEquals(1, result.getInstance().size());
|
||||||
|
final Instance currentInstance = Objects.requireNonNull(result.getInstance()).get(0);
|
||||||
|
assertNotNull(currentInstance.getMeasures());
|
||||||
|
assertNotNull(result.getTitle());
|
||||||
|
assertFalse(OafUtils.isAnEnrichment(result));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the merge of the APC at the level of the result and the instance.
|
||||||
|
*
|
||||||
|
* @throws Exception the exception
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testAPCMerge() throws Exception {
|
||||||
|
List<Publication> publications = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/publication_apc.json", Publication.class);
|
||||||
|
System.out.println(publications.size());
|
||||||
|
publications.forEach(p -> assertEquals(1, p.getInstance().size()));
|
||||||
|
publications.forEach(p -> assertTrue(p.getProcessingchargeamount() != null ));
|
||||||
|
publications.forEach(p -> assertTrue(p.getProcessingchargecurrency() != null ));
|
||||||
|
publications.forEach(p -> assertTrue(StringUtils.isNotBlank(p.getProcessingchargeamount().getValue() )));
|
||||||
|
publications.forEach(p -> assertTrue(StringUtils.isNotBlank(p.getProcessingchargecurrency().getValue() )));
|
||||||
|
|
||||||
|
publications.forEach(p -> p.getInstance().stream()
|
||||||
|
.forEach(i -> assertTrue(i.getProcessingchargeamount() != null)));
|
||||||
|
publications.forEach(p -> p.getInstance().stream()
|
||||||
|
.forEach(i -> assertTrue(i.getProcessingchargecurrency() != null)));
|
||||||
|
|
||||||
|
publications.forEach(p -> p.getInstance().stream()
|
||||||
|
.forEach(i -> assertTrue(StringUtils.isNotBlank(i.getProcessingchargeamount().getValue()))));
|
||||||
|
publications.forEach(p -> p.getInstance().stream()
|
||||||
|
.forEach(i -> assertTrue(StringUtils.isNotBlank(i.getProcessingchargecurrency().getValue()))));
|
||||||
|
|
||||||
|
Result p1 = publications.get(0);
|
||||||
|
Result p2 = publications.get(1);
|
||||||
|
|
||||||
|
p1 = OafUtils.mergeOAF(p1, p2);
|
||||||
|
|
||||||
|
assertEquals("1721.47", p1.getProcessingchargeamount().getValue());
|
||||||
|
assertEquals("EUR", p1.getProcessingchargecurrency().getValue());
|
||||||
|
|
||||||
|
assertEquals(2 , p1.getInstance().size());
|
||||||
|
|
||||||
|
p1.getInstance().stream().forEach(i -> assertTrue(i.getProcessingchargeamount() != null));
|
||||||
|
p1.getInstance().stream().forEach(i -> assertTrue(i.getProcessingchargecurrency() != null));
|
||||||
|
|
||||||
|
p1.getInstance().stream().anyMatch(i -> i.getProcessingchargeamount().getValue().equals("2000.47"));
|
||||||
|
p1.getInstance().stream().anyMatch(i -> i.getProcessingchargeamount().getValue().equals("1721.47"));
|
||||||
|
|
||||||
|
p1.getInstance().stream().anyMatch(i -> i.getProcessingchargecurrency().getValue().equals("EUR"));
|
||||||
|
p1.getInstance().stream().anyMatch(i -> i.getProcessingchargecurrency().getValue().equals("USD"));
|
||||||
|
System.out.println(new ObjectMapper().writeValueAsString(p1));
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
void testAPCMerge2() throws Exception {
|
||||||
|
List<Publication> publications = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/publication_apc2.json", Publication.class);
|
||||||
|
System.out.println(publications.size());
|
||||||
|
publications.forEach(p -> assertEquals(1, p.getInstance().size()));
|
||||||
|
assertTrue(publications.get(0).getProcessingchargeamount() != null );
|
||||||
|
assertTrue(publications.get(0).getProcessingchargecurrency() != null );
|
||||||
|
assertTrue(publications.get(1).getProcessingchargeamount() == null );
|
||||||
|
|
||||||
|
Result p1 = publications.get(1);
|
||||||
|
Result p2 = publications.get(0);
|
||||||
|
|
||||||
|
//merge visible record with OpenAPC
|
||||||
|
p1 = (Result) OafUtils.mergeOAF(p1, p2);
|
||||||
|
|
||||||
|
assertFalse(p1.getDataInfo().getInvisible());
|
||||||
|
assertEquals("1721.47", p1.getProcessingchargeamount().getValue());
|
||||||
|
assertEquals("EUR", p1.getProcessingchargecurrency().getValue());
|
||||||
|
|
||||||
|
assertEquals(2 , p1.getInstance().size());
|
||||||
|
p1.getInstance().stream().anyMatch(i -> i.getProcessingchargeamount() != null);
|
||||||
|
|
||||||
|
p1.getInstance().stream().anyMatch(i -> i.getProcessingchargecurrency() != null);
|
||||||
|
|
||||||
|
assertEquals("1721.47", p1.getInstance().stream().filter(i -> i.getProcessingchargeamount() != null)
|
||||||
|
.collect(Collectors.toList()).get(0).getProcessingchargeamount().getValue());
|
||||||
|
|
||||||
|
assertEquals("EUR", p1.getInstance().stream().filter(i -> i.getProcessingchargeamount() != null)
|
||||||
|
.collect(Collectors.toList()).get(0).getProcessingchargecurrency().getValue());
|
||||||
|
assertFalse(p1.getDataInfo().getInvisible());
|
||||||
|
System.out.println(new ObjectMapper().writeValueAsString(p1));
|
||||||
|
|
||||||
|
//merge OpenAPC with visible record
|
||||||
|
|
||||||
|
p2 = (Result) OafUtils.mergeOAF(p2, p1);
|
||||||
|
|
||||||
|
assertFalse(p2.getDataInfo().getInvisible());
|
||||||
|
assertEquals("1721.47", p2.getProcessingchargeamount().getValue());
|
||||||
|
assertEquals("EUR", p2.getProcessingchargecurrency().getValue());
|
||||||
|
|
||||||
|
assertEquals(2 , p2.getInstance().size());
|
||||||
|
p2.getInstance().stream().anyMatch(i -> i.getProcessingchargeamount() != null);
|
||||||
|
|
||||||
|
p2.getInstance().stream().anyMatch(i -> i.getProcessingchargecurrency() != null);
|
||||||
|
|
||||||
|
assertEquals("1721.47", p2.getInstance().stream().filter(i -> i.getProcessingchargeamount() != null)
|
||||||
|
.collect(Collectors.toList()).get(0).getProcessingchargeamount().getValue());
|
||||||
|
|
||||||
|
assertEquals("EUR", p2.getInstance().stream().filter(i -> i.getProcessingchargeamount() != null)
|
||||||
|
.collect(Collectors.toList()).get(0).getProcessingchargecurrency().getValue());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test enrichment function.
|
||||||
|
*
|
||||||
|
* @throws Exception the exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void testEnrichment() throws Exception {
|
||||||
|
|
||||||
|
|
||||||
|
// 1 TEST UPDATING PID INSTANCE AND MERGE CURRENT PUBLICATION WITH ENRICHMENT
|
||||||
|
// LOAD test publications
|
||||||
|
List<Publication> publications = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/publications.json", Publication.class);
|
||||||
|
// Assert that each publication has only one instance and inside that all the measure field is empty
|
||||||
|
publications.forEach(p -> {
|
||||||
|
assertEquals(1, p.getInstance().size());
|
||||||
|
final Instance currentInstance = Objects.requireNonNull(p.getInstance()).get(0);
|
||||||
|
assertNull(currentInstance.getMeasures());
|
||||||
|
});
|
||||||
|
|
||||||
|
// LOAD test enrichments
|
||||||
|
List<Publication> enrichment = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/enrichment.json", Publication.class);
|
||||||
|
|
||||||
|
updatePidIntoPublicationInstance(publications, enrichment, false);
|
||||||
|
applyAndVerifyEnrichment(publications, enrichment);
|
||||||
|
|
||||||
|
|
||||||
|
// 2 TEST UPDATING ALTERNATE ID INSTANCE AND MERGE CURRENT PUBLICATION WITH ENRICHMENT
|
||||||
|
publications = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/publications.json", Publication.class);
|
||||||
|
enrichment = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/enrichment.json", Publication.class);
|
||||||
|
updatePidIntoPublicationInstance(publications, enrichment, true);
|
||||||
|
applyAndVerifyEnrichment(publications, enrichment);
|
||||||
|
|
||||||
|
|
||||||
|
// 3 TEST UPDATING PID INSTANCE AND MERGE ENRICHMENT WITH CURRENT PUBLICATION
|
||||||
|
publications = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/publications.json", Publication.class);
|
||||||
|
enrichment = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/enrichment.json", Publication.class);
|
||||||
|
updatePidIntoPublicationInstance(publications, enrichment, false);
|
||||||
|
applyAndVerifyEnrichment( enrichment, publications);
|
||||||
|
|
||||||
|
// 4 TEST UPDATING ALTERNATE ID INSTANCE AND MERGE ENRICHMENT WITH CURRENT PUBLICATION
|
||||||
|
publications = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/publications.json", Publication.class);
|
||||||
|
enrichment = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/enrichment.json", Publication.class);
|
||||||
|
updatePidIntoPublicationInstance(publications, enrichment, true);
|
||||||
|
applyAndVerifyEnrichment( enrichment, publications);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge publication date of acceptance test both present.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergePublicationDateOfAcceptanceTest_bothPresent() {
|
||||||
|
|
||||||
|
Publication a = publication();
|
||||||
|
Publication b = publication();
|
||||||
|
|
||||||
|
a.setDateofacceptance(field("2021-06-18"));
|
||||||
|
b.setDateofacceptance(field("2021-06-19"));
|
||||||
|
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
|
||||||
|
assertNotNull(a.getDateofacceptance());
|
||||||
|
assertEquals("2021-06-18", a.getDateofacceptance().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge publication date of acceptance test both present 1.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergePublicationDateOfAcceptanceTest_bothPresent_1() {
|
||||||
|
|
||||||
|
Publication a = publication("0.8");
|
||||||
|
Publication b = publication("0.9");
|
||||||
|
|
||||||
|
a.setDateofacceptance(field("2021-06-18"));
|
||||||
|
b.setDateofacceptance(field("2021-06-19"));
|
||||||
|
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
|
||||||
|
assertNotNull(a.getDateofacceptance());
|
||||||
|
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge publication date of acceptance test both present 2.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergePublicationDateOfAcceptanceTest_bothPresent_2() {
|
||||||
|
|
||||||
|
Publication a = publication("0.9");
|
||||||
|
Publication b = publication("0.8");
|
||||||
|
|
||||||
|
a.setDateofacceptance(field("2021-06-18"));
|
||||||
|
b.setDateofacceptance(field("2021-06-19"));
|
||||||
|
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
|
||||||
|
assertNotNull(a.getDateofacceptance());
|
||||||
|
assertEquals("2021-06-18", a.getDateofacceptance().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge publication date of acceptance test left missing.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergePublicationDateOfAcceptanceTest_leftMissing() {
|
||||||
|
|
||||||
|
Publication a = publication();
|
||||||
|
Publication b = publication();
|
||||||
|
|
||||||
|
b.setDateofacceptance(field("2021-06-19"));
|
||||||
|
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
|
||||||
|
assertNotNull(a.getDateofacceptance());
|
||||||
|
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge publication date of acceptance test left missing 1.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergePublicationDateOfAcceptanceTest_leftMissing_1() {
|
||||||
|
|
||||||
|
Publication a = publication("0.9");
|
||||||
|
Publication b = publication("0.8");
|
||||||
|
|
||||||
|
b.setDateofacceptance(field("2021-06-19"));
|
||||||
|
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
|
||||||
|
assertNotNull(a.getDateofacceptance());
|
||||||
|
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge publication date of acceptance test left missing 2.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergePublicationDateOfAcceptanceTest_leftMissing_2() {
|
||||||
|
|
||||||
|
Publication a = publication("0.8");
|
||||||
|
Publication b = publication("0.9");
|
||||||
|
|
||||||
|
b.setDateofacceptance(field("2021-06-19"));
|
||||||
|
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
|
||||||
|
assertNotNull(a.getDateofacceptance());
|
||||||
|
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge publication date of acceptance test right missing.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergePublicationDateOfAcceptanceTest_rightMissing() {
|
||||||
|
|
||||||
|
Publication a = publication();
|
||||||
|
Publication b = publication();
|
||||||
|
|
||||||
|
a.setDateofacceptance(field("2021-06-19"));
|
||||||
|
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
|
||||||
|
assertNotNull(a.getDateofacceptance());
|
||||||
|
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge publication date of acceptance test right missing 1.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergePublicationDateOfAcceptanceTest_rightMissing_1() {
|
||||||
|
|
||||||
|
Publication a = publication("0.8");
|
||||||
|
Publication b = publication("0.9");
|
||||||
|
|
||||||
|
a.setDateofacceptance(field("2021-06-19"));
|
||||||
|
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
|
||||||
|
assertNotNull(a.getDateofacceptance());
|
||||||
|
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge publication date of acceptance test right missing 2.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergePublicationDateOfAcceptanceTest_rightMissing_2() {
|
||||||
|
|
||||||
|
Publication a = publication("0.9");
|
||||||
|
Publication b = publication("0.8");
|
||||||
|
|
||||||
|
a.setDateofacceptance(field("2021-06-19"));
|
||||||
|
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
|
||||||
|
assertNotNull(a.getDateofacceptance());
|
||||||
|
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge publication subject test.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergePublicationSubjectTest() {
|
||||||
|
|
||||||
|
Publication a = publication();
|
||||||
|
Publication b = publication();
|
||||||
|
|
||||||
|
a.setSubject(Arrays.asList(setSP("a", "open", "classe"), setSP("b", "open", "classe")));
|
||||||
|
b.setSubject(Arrays.asList(setSP("A", "open", "classe"), setSP("c", "open", "classe")));
|
||||||
|
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
|
||||||
|
assertNotNull(a.getSubject());
|
||||||
|
assertEquals(3, a.getSubject().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge relation test.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergeRelationTest() {
|
||||||
|
|
||||||
|
Relation a = createRel(null, null);
|
||||||
|
Relation b = createRel(null, null);
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
assertEquals(a, b);
|
||||||
|
|
||||||
|
a = createRel(true, null);
|
||||||
|
b = createRel(null, null);
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
assertEquals(true, a.getValidated());
|
||||||
|
|
||||||
|
a = createRel(true, null);
|
||||||
|
b = createRel(false, null);
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
assertEquals(true, a.getValidated());
|
||||||
|
|
||||||
|
a = createRel(true, null);
|
||||||
|
b = createRel(true, "2016-04-05T12:41:19.202Z");
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
assertEquals("2016-04-05T12:41:19.202Z", a.getValidationDate());
|
||||||
|
|
||||||
|
a = createRel(true, "2016-05-07T12:41:19.202Z");
|
||||||
|
b = createRel(true, "2016-04-05T12:41:19.202Z");
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
assertEquals("2016-04-05T12:41:19.202Z", a.getValidationDate());
|
||||||
|
|
||||||
|
a = createRel(true, "2020-09-10 11:08:52");
|
||||||
|
b = createRel(true, "2021-09-10 11:08:52");
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
assertEquals("2020-09-10 11:08:52", a.getValidationDate());
|
||||||
|
|
||||||
|
a = createRel(true, "2021-03-16T10:32:42Z");
|
||||||
|
b = createRel(true, "2020-03-16T10:32:42Z");
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
assertEquals("2020-03-16T10:32:42Z", a.getValidationDate());
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge relation test parse exception.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void mergeRelationTestParseException() {
|
||||||
|
assertThrows(DateTimeParseException.class, () -> {
|
||||||
|
Relation a = createRel(true, "Once upon a time ...");
|
||||||
|
Relation b = createRel(true, "... in a far away land");
|
||||||
|
a = OafUtils.mergeOAF(a,b);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create rel relation.
|
||||||
|
*
|
||||||
|
* @param validated the validated
|
||||||
|
* @param validationDate the validation date
|
||||||
|
* @return the relation
|
||||||
|
*/
|
||||||
|
private Relation createRel(Boolean validated, String validationDate) {
|
||||||
|
Relation rel = new Relation();
|
||||||
|
rel.setSource("1");
|
||||||
|
rel.setTarget("2");
|
||||||
|
rel.setRelType("reltype");
|
||||||
|
rel.setSubRelType("subreltype");
|
||||||
|
rel.setRelClass("relclass");
|
||||||
|
rel.setValidated(validated);
|
||||||
|
rel.setValidationDate(validationDate);
|
||||||
|
return rel;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets kv.
|
||||||
|
*
|
||||||
|
* @param key the key
|
||||||
|
* @param value the value
|
||||||
|
* @return the kv
|
||||||
|
*/
|
||||||
|
private KeyValue setKV(final String key, final String value) {
|
||||||
|
|
||||||
|
KeyValue k = new KeyValue();
|
||||||
|
|
||||||
|
k.setKey(key);
|
||||||
|
k.setValue(value);
|
||||||
|
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets sp.
|
||||||
|
*
|
||||||
|
* @param value the value
|
||||||
|
* @param schema the schema
|
||||||
|
* @param classname the classname
|
||||||
|
* @return the sp
|
||||||
|
*/
|
||||||
|
private StructuredProperty setSP(
|
||||||
|
final String value, final String schema, final String classname) {
|
||||||
|
StructuredProperty s = new StructuredProperty();
|
||||||
|
s.setValue(value);
|
||||||
|
Qualifier q = new Qualifier();
|
||||||
|
q.setClassname(classname);
|
||||||
|
q.setClassid(classname);
|
||||||
|
q.setSchemename(schema);
|
||||||
|
q.setSchemeid(schema);
|
||||||
|
s.setQualifier(q);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Field field.
|
||||||
|
*
|
||||||
|
* @param <T> the type parameter
|
||||||
|
* @param value the value
|
||||||
|
* @return the field
|
||||||
|
*/
|
||||||
|
private <T> Field<T> field(T value) {
|
||||||
|
Field<T> f = new Field();
|
||||||
|
f.setValue(value);
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Publication publication.
|
||||||
|
*
|
||||||
|
* @return the publication
|
||||||
|
*/
|
||||||
|
private Publication publication() {
|
||||||
|
Publication p = new Publication();
|
||||||
|
p.setDataInfo(df("0.9"));
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Publication publication.
|
||||||
|
*
|
||||||
|
* @param trust the trust
|
||||||
|
* @return the publication
|
||||||
|
*/
|
||||||
|
private Publication publication(String trust) {
|
||||||
|
Publication p = new Publication();
|
||||||
|
p.setDataInfo(df(trust));
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Df data info.
|
||||||
|
*
|
||||||
|
* @param trust the trust
|
||||||
|
* @return the data info
|
||||||
|
*/
|
||||||
|
private DataInfo df(String trust) {
|
||||||
|
DataInfo d = new DataInfo();
|
||||||
|
d.setTrust(trust);
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,140 +1 @@
|
||||||
{
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]}
|
||||||
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g",
|
|
||||||
"resuttype": {"classid": "dataset"},
|
|
||||||
"pid": [
|
|
||||||
{
|
|
||||||
"qualifier": {"classid": "doi"},
|
|
||||||
"value": "10.1016/j.cmet.2011.03.013"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"qualifier": {"classid": "urn"},
|
|
||||||
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"qualifier": {"classid": "scp-number"},
|
|
||||||
"value": "79953761260"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"qualifier": {"classid": "pmc"},
|
|
||||||
"value": "21459329"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"collectedfrom": [
|
|
||||||
{
|
|
||||||
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3",
|
|
||||||
"value": "Repository B"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"instance": [
|
|
||||||
{
|
|
||||||
"refereed": {
|
|
||||||
"classid": "0000",
|
|
||||||
"classname": "UNKNOWN",
|
|
||||||
"schemeid": "dnet:review_levels",
|
|
||||||
"schemename": "dnet:review_levels"
|
|
||||||
},
|
|
||||||
"hostedby": {
|
|
||||||
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
|
||||||
"value": "Zenodo"
|
|
||||||
},
|
|
||||||
"accessright": {
|
|
||||||
"classid": "OPEN",
|
|
||||||
"classname": "Open Access",
|
|
||||||
"schemeid": "dnet:access_modes",
|
|
||||||
"schemename": "dnet:access_modes"
|
|
||||||
},
|
|
||||||
"processingchargecurrency": {
|
|
||||||
"dataInfo": {
|
|
||||||
"provenanceaction": {
|
|
||||||
"classid": "sysimport:crosswalk:datasetarchive",
|
|
||||||
"classname": "Harvested",
|
|
||||||
"schemeid": "dnet:provenanceActions",
|
|
||||||
"schemename": "dnet:provenanceActions"
|
|
||||||
},
|
|
||||||
"deletedbyinference": false,
|
|
||||||
"inferred": false,
|
|
||||||
"inferenceprovenance": "",
|
|
||||||
"invisible": true,
|
|
||||||
"trust": "0.9"
|
|
||||||
},
|
|
||||||
"value": "EUR"
|
|
||||||
},
|
|
||||||
"pid": [
|
|
||||||
{
|
|
||||||
"dataInfo": {
|
|
||||||
"provenanceaction": {
|
|
||||||
"classid": "sysimport:crosswalk:datasetarchive",
|
|
||||||
"classname": "Harvested",
|
|
||||||
"schemeid": "dnet:provenanceActions",
|
|
||||||
"schemename": "dnet:provenanceActions"
|
|
||||||
},
|
|
||||||
"deletedbyinference": false,
|
|
||||||
"inferred": false,
|
|
||||||
"inferenceprovenance": "",
|
|
||||||
"invisible": true,
|
|
||||||
"trust": "0.9"
|
|
||||||
},
|
|
||||||
"qualifier": {
|
|
||||||
"classid": "doi",
|
|
||||||
"classname": "Digital Object Identifier",
|
|
||||||
"schemeid": "dnet:pid_types",
|
|
||||||
"schemename": "dnet:pid_types"
|
|
||||||
},
|
|
||||||
"value": "10.1371/journal.pone.0085605"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"distributionlocation": "",
|
|
||||||
"url": ["https://doi.org/10.1371/journal.pone.0085605"],
|
|
||||||
"alternateIdentifier": [
|
|
||||||
{
|
|
||||||
"dataInfo": {
|
|
||||||
"provenanceaction": {
|
|
||||||
"classid": "sysimport:crosswalk:datasetarchive",
|
|
||||||
"classname": "Harvested",
|
|
||||||
"schemeid": "dnet:provenanceActions",
|
|
||||||
"schemename": "dnet:provenanceActions"
|
|
||||||
},
|
|
||||||
"deletedbyinference": false,
|
|
||||||
"inferred": false,
|
|
||||||
"inferenceprovenance": "",
|
|
||||||
"invisible": true,
|
|
||||||
"trust": "0.9"
|
|
||||||
},
|
|
||||||
"qualifier": {
|
|
||||||
"classid": "pmid",
|
|
||||||
"classname": "PubMed ID",
|
|
||||||
"schemeid": "dnet:pid_types",
|
|
||||||
"schemename": "dnet:pid_types"
|
|
||||||
},
|
|
||||||
"value": "24454899.0"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"collectedfrom": {
|
|
||||||
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3",
|
|
||||||
"value": "Repository B"
|
|
||||||
},
|
|
||||||
"processingchargeamount": {
|
|
||||||
"dataInfo": {
|
|
||||||
"provenanceaction": {
|
|
||||||
"classid": "sysimport:crosswalk:datasetarchive",
|
|
||||||
"classname": "Harvested",
|
|
||||||
"schemeid": "dnet:provenanceActions",
|
|
||||||
"schemename": "dnet:provenanceActions"
|
|
||||||
},
|
|
||||||
"deletedbyinference": false,
|
|
||||||
"inferred": false,
|
|
||||||
"inferenceprovenance": "",
|
|
||||||
"invisible": true,
|
|
||||||
"trust": "0.9"
|
|
||||||
},
|
|
||||||
"value": "1022.02"
|
|
||||||
},
|
|
||||||
"instancetype": {
|
|
||||||
"classid": "0004",
|
|
||||||
"classname": "Conference object",
|
|
||||||
"schemeid": "dnet:publication_resource",
|
|
||||||
"schemename": "dnet:publication_resource"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0000/ra.v2i3.114::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.65008652949e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0000/ra.v2i3.114"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/(aj).v3i6.458::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.01810569717e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/(aj).v3i6.458"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/1587::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.39172290649e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/1587"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/462::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.33235333753e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.36"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.00285265116e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/462"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/731::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.01810569717e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/731"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/ijllis.v9i4.2066.g2482::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"8.48190886761e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/ijllis.v9i4.2066.g2482"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0118/alfahim.v3i1.140::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9.88840807598e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0118/alfahim.v3i1.140"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0166/fk2.stagefigshare.6442896.v3::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"7.28336930301e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0166/fk2.stagefigshare.6442896.v3"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0301/jttb.v2i1.64::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"7.28336930301e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0301/jttb.v2i1.64"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0809/seruni.v1i1.567::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"2.62959564033e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0809/seruni.v1i1.567"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0809/seruni.v2i1.765::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9.40178571921e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0559872"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"3.67659957614e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0809/seruni.v2i1.765"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0901/jkip.v7i3.485::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.26204125721e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0901/jkip.v7i3.485"}]}]}
|
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]}
|
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[]}
|
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,33 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
|
||||||
|
"value": "Crossref"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
|
||||||
|
"value": "Crossref"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
|
||||||
|
"value": "Europe PubMed Central"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||||
|
"value": "Zenodo"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
|
||||||
|
"value": "Europe PubMed Central"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"id": "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||||
|
"value": "Zenodo"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "handle"},
|
||||||
|
"value": "11012/83840"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::2852",
|
||||||
|
"value": "Digital library of Brno University of Technology"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "handle"},
|
||||||
|
"value": "11012/83840"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||||
|
"value": "Zenodo"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.5281/zenodo.5121485"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
|
||||||
|
"value": "Europe PubMed Central"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
{
|
||||||
|
"id": "50|openapc_____::000023f9cb6e3a247c764daec4273cbc",
|
||||||
|
"resuttype": {
|
||||||
|
"classid": "publication"
|
||||||
|
},
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|apc_________::e2b1600b229fc30663c8a1f662debddf",
|
||||||
|
"value": "OpenAPC Global Initiative"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmid"},
|
||||||
|
"value": "25811027"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"url":["https://doi.org/10.1155/2015/439379"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
|
||||||
|
"value": "Europe PubMed Central"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier":{"classid":"pmc"},
|
||||||
|
"value":"21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "urn"
|
||||||
|
},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "scp-number"
|
||||||
|
},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "pmcid"
|
||||||
|
},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
|
@ -1,5 +1,6 @@
|
||||||
package eu.dnetlib.dhp.datacite
|
package eu.dnetlib.dhp.datacite
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.client.AbstractRestClient
|
||||||
import org.json4s.jackson.JsonMethods.{compact, parse, render}
|
import org.json4s.jackson.JsonMethods.{compact, parse, render}
|
||||||
import org.json4s.{DefaultFormats, JValue}
|
import org.json4s.{DefaultFormats, JValue}
|
||||||
|
|
||||||
|
|
|
@ -511,7 +511,7 @@ object DoiBoostMappingUtil {
|
||||||
if (!ret.startsWith(DOI_PREFIX))
|
if (!ret.startsWith(DOI_PREFIX))
|
||||||
return null
|
return null
|
||||||
|
|
||||||
return ret
|
ret
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
package eu.dnetlib.doiboost
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo}
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
|
||||||
|
import org.apache.commons.lang.StringUtils
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
|
object DoiBoostTransformationUtils {
|
||||||
|
|
||||||
|
|
||||||
|
val TRUST = "0.9"
|
||||||
|
|
||||||
|
def generateDataInfo(): DataInfo = {
|
||||||
|
val di = new DataInfo
|
||||||
|
di.setDeletedbyinference(false)
|
||||||
|
di.setInferred(false)
|
||||||
|
di.setInvisible(false)
|
||||||
|
di.setTrust(TRUST)
|
||||||
|
di.setProvenanceaction(
|
||||||
|
OafMapperUtils.qualifier(
|
||||||
|
ModelConstants.SYSIMPORT_ACTIONSET,
|
||||||
|
ModelConstants.SYSIMPORT_ACTIONSET,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS
|
||||||
|
)
|
||||||
|
)
|
||||||
|
di
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def generateOAFAuthor(given: String, family: String, orcid: String, index: Int): Author = {
|
||||||
|
val a = new Author
|
||||||
|
a.setName(given)
|
||||||
|
a.setSurname(family)
|
||||||
|
a.setFullname(s"$given $family")
|
||||||
|
a.setRank(index + 1)
|
||||||
|
if (StringUtils.isNotBlank(orcid))
|
||||||
|
a.setPid(
|
||||||
|
List(
|
||||||
|
structuredProperty(orcid, qualifier(ModelConstants.ORCID_PENDING,ModelConstants.ORCID_PENDING,ModelConstants.DNET_PID_TYPES,ModelConstants.DNET_PID_TYPES), generateDataInfo)
|
||||||
|
).asJava
|
||||||
|
)
|
||||||
|
|
||||||
|
a
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,18 +1,21 @@
|
||||||
package eu.dnetlib.doiboost.crossref
|
package eu.dnetlib.doiboost.crossref
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.oaf._
|
import eu.dnetlib.dhp.schema.oaf._
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
|
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
|
||||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
|
||||||
import org.apache.commons.lang.StringUtils
|
import org.apache.commons.lang.StringUtils
|
||||||
|
import org.apache.commons.lang3.tuple
|
||||||
import org.json4s
|
import org.json4s
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
import org.json4s.JsonAST._
|
import org.json4s.JsonAST._
|
||||||
import org.json4s.jackson.JsonMethods._
|
import org.json4s.jackson.JsonMethods._
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
import eu.dnetlib.doiboost.DoiBoostTransformationUtils._
|
||||||
|
|
||||||
import java.util
|
import java.util
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
|
@ -32,63 +35,10 @@ case class mappingAuthor(
|
||||||
|
|
||||||
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
|
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
|
||||||
|
|
||||||
case object Crossref2Oaf {
|
object Crossref2Oaf {
|
||||||
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||||
|
|
||||||
val mappingCrossrefType = Map(
|
def mappingResult(result: Result, json: JValue, resourceType:Qualifier, instanceType: Qualifier): Result = {
|
||||||
"book-section" -> "publication",
|
|
||||||
"book" -> "publication",
|
|
||||||
"book-chapter" -> "publication",
|
|
||||||
"book-part" -> "publication",
|
|
||||||
"book-series" -> "publication",
|
|
||||||
"book-set" -> "publication",
|
|
||||||
"book-track" -> "publication",
|
|
||||||
"edited-book" -> "publication",
|
|
||||||
"reference-book" -> "publication",
|
|
||||||
"monograph" -> "publication",
|
|
||||||
"journal-article" -> "publication",
|
|
||||||
"dissertation" -> "publication",
|
|
||||||
"other" -> "publication",
|
|
||||||
"peer-review" -> "publication",
|
|
||||||
"proceedings" -> "publication",
|
|
||||||
"proceedings-article" -> "publication",
|
|
||||||
"reference-entry" -> "publication",
|
|
||||||
"report" -> "publication",
|
|
||||||
"report-series" -> "publication",
|
|
||||||
"standard" -> "publication",
|
|
||||||
"standard-series" -> "publication",
|
|
||||||
"posted-content" -> "publication",
|
|
||||||
"dataset" -> "dataset"
|
|
||||||
)
|
|
||||||
|
|
||||||
val mappingCrossrefSubType = Map(
|
|
||||||
"book-section" -> "0013 Part of book or chapter of book",
|
|
||||||
"book" -> "0002 Book",
|
|
||||||
"book-chapter" -> "0013 Part of book or chapter of book",
|
|
||||||
"book-part" -> "0013 Part of book or chapter of book",
|
|
||||||
"book-series" -> "0002 Book",
|
|
||||||
"book-set" -> "0002 Book",
|
|
||||||
"book-track" -> "0002 Book",
|
|
||||||
"edited-book" -> "0002 Book",
|
|
||||||
"reference-book" -> "0002 Book",
|
|
||||||
"monograph" -> "0002 Book",
|
|
||||||
"journal-article" -> "0001 Article",
|
|
||||||
"dissertation" -> "0044 Thesis",
|
|
||||||
"other" -> "0038 Other literature type",
|
|
||||||
"peer-review" -> "0015 Review",
|
|
||||||
"proceedings" -> "0004 Conference object",
|
|
||||||
"proceedings-article" -> "0004 Conference object",
|
|
||||||
"reference-entry" -> "0013 Part of book or chapter of book",
|
|
||||||
"report" -> "0017 Report",
|
|
||||||
"report-series" -> "0017 Report",
|
|
||||||
"standard" -> "0038 Other literature type",
|
|
||||||
"standard-series" -> "0038 Other literature type",
|
|
||||||
"dataset" -> "0021 Dataset",
|
|
||||||
"preprint" -> "0016 Preprint",
|
|
||||||
"report" -> "0017 Report"
|
|
||||||
)
|
|
||||||
|
|
||||||
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = {
|
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
|
||||||
//MAPPING Crossref DOI into PID
|
//MAPPING Crossref DOI into PID
|
||||||
|
@ -214,7 +164,7 @@ case object Crossref2Oaf {
|
||||||
)
|
)
|
||||||
|
|
||||||
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
|
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
|
||||||
generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)
|
generateOAFAuthor(a.given.orNull, a.family, a.ORCID.orNull, index)
|
||||||
}.asJava)
|
}.asJava)
|
||||||
|
|
||||||
// Mapping instance
|
// Mapping instance
|
||||||
|
@ -256,22 +206,8 @@ case object Crossref2Oaf {
|
||||||
instance.setAccessright(
|
instance.setAccessright(
|
||||||
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
|
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
|
||||||
)
|
)
|
||||||
instance.setInstancetype(
|
instance.setInstancetype(instanceType)
|
||||||
OafMapperUtils.qualifier(
|
result.setResourcetype(resourceType)
|
||||||
cobjCategory.substring(0, 4),
|
|
||||||
cobjCategory.substring(5),
|
|
||||||
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
|
||||||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
|
||||||
)
|
|
||||||
)
|
|
||||||
result.setResourcetype(
|
|
||||||
OafMapperUtils.qualifier(
|
|
||||||
cobjCategory.substring(0, 4),
|
|
||||||
cobjCategory.substring(5),
|
|
||||||
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
|
||||||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
||||||
if (StringUtils.isNotBlank(issuedDate)) {
|
if (StringUtils.isNotBlank(issuedDate)) {
|
||||||
|
@ -280,10 +216,7 @@ case object Crossref2Oaf {
|
||||||
instance.setDateofacceptance(asField(createdDate.getValue))
|
instance.setDateofacceptance(asField(createdDate.getValue))
|
||||||
}
|
}
|
||||||
val s: List[String] = List("https://doi.org/" + doi)
|
val s: List[String] = List("https://doi.org/" + doi)
|
||||||
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
|
|
||||||
// if (links.nonEmpty) {
|
|
||||||
// instance.setUrl(links.asJava)
|
|
||||||
// }
|
|
||||||
if (s.nonEmpty) {
|
if (s.nonEmpty) {
|
||||||
instance.setUrl(s.asJava)
|
instance.setUrl(s.asJava)
|
||||||
}
|
}
|
||||||
|
@ -309,28 +242,9 @@ case object Crossref2Oaf {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
def generateAuhtor(given: String, family: String, orcid: String, index: Int): Author = {
|
|
||||||
val a = new Author
|
|
||||||
a.setName(given)
|
|
||||||
a.setSurname(family)
|
|
||||||
a.setFullname(s"$given $family")
|
|
||||||
a.setRank(index + 1)
|
|
||||||
if (StringUtils.isNotBlank(orcid))
|
|
||||||
a.setPid(
|
|
||||||
List(
|
|
||||||
createSP(
|
|
||||||
orcid,
|
|
||||||
ModelConstants.ORCID_PENDING,
|
|
||||||
ModelConstants.DNET_PID_TYPES,
|
|
||||||
generateDataInfo()
|
|
||||||
)
|
|
||||||
).asJava
|
|
||||||
)
|
|
||||||
|
|
||||||
a
|
|
||||||
}
|
|
||||||
|
|
||||||
def convert(input: String): List[Oaf] = {
|
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json: json4s.JValue = parse(input)
|
lazy val json: json4s.JValue = parse(input)
|
||||||
|
|
||||||
|
@ -341,14 +255,14 @@ case object Crossref2Oaf {
|
||||||
if (objectType == null)
|
if (objectType == null)
|
||||||
return resultList
|
return resultList
|
||||||
|
|
||||||
val result = generateItemFromType(objectType, objectSubType)
|
val result = generateItemFromType(objectType, objectSubType, vocabularies)
|
||||||
if (result == null)
|
if (result == null)
|
||||||
return List()
|
return List()
|
||||||
val cOBJCategory = mappingCrossrefSubType.getOrElse(
|
|
||||||
objectType,
|
val (resourceType, instanceType) =getTypeQualifier(objectType, objectSubType, vocabularies)
|
||||||
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
|
|
||||||
)
|
|
||||||
mappingResult(result, json, cOBJCategory)
|
mappingResult(result, json, resourceType, instanceType)
|
||||||
if (result == null || result.getId == null)
|
if (result == null || result.getId == null)
|
||||||
return List()
|
return List()
|
||||||
|
|
||||||
|
@ -366,7 +280,7 @@ case object Crossref2Oaf {
|
||||||
}
|
}
|
||||||
|
|
||||||
result match {
|
result match {
|
||||||
case publication: Publication => convertPublication(publication, json, cOBJCategory)
|
case publication: Publication => convertPublication(publication, json)
|
||||||
case dataset: Dataset => convertDataset(dataset)
|
case dataset: Dataset => convertDataset(dataset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -521,12 +435,14 @@ case object Crossref2Oaf {
|
||||||
// TODO check if there are other info to map into the Dataset
|
// TODO check if there are other info to map into the Dataset
|
||||||
}
|
}
|
||||||
|
|
||||||
def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = {
|
def convertPublication(publication: Publication, json: JValue): Unit = {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
|
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
|
||||||
|
|
||||||
|
val className = publication.getInstance().asScala.map(i => i.getInstancetype.getClassname).head
|
||||||
|
|
||||||
//Mapping book
|
//Mapping book
|
||||||
if (cobjCategory.toLowerCase.contains("book")) {
|
if ("book".equalsIgnoreCase(className)) {
|
||||||
val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn
|
val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn
|
||||||
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
|
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
|
||||||
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
||||||
|
@ -607,12 +523,27 @@ case object Crossref2Oaf {
|
||||||
null
|
null
|
||||||
}
|
}
|
||||||
|
|
||||||
def generateItemFromType(objectType: String, objectSubType: String): Result = {
|
|
||||||
if (mappingCrossrefType.contains(objectType)) {
|
|
||||||
if (mappingCrossrefType(objectType).equalsIgnoreCase("publication"))
|
def getTypeQualifier(objectType: String, objectSubType:String, vocabularies: VocabularyGroup):(Qualifier,Qualifier) = {
|
||||||
return new Publication()
|
val result: tuple.Pair[Qualifier, Qualifier] = DHPUtils.retrieveOAFTypeFromVocabulary(objectType, objectSubType,null, vocabularies)
|
||||||
if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset"))
|
|
||||||
return new Dataset()
|
if (result!= null)
|
||||||
|
(result.getValue, result.getKey)
|
||||||
|
else
|
||||||
|
null
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateItemFromType(objectType: String, objectSubType:String, vocabularies: VocabularyGroup): Result = {
|
||||||
|
|
||||||
|
val result =getTypeQualifier(objectType, objectSubType, vocabularies)
|
||||||
|
if (result != null)
|
||||||
|
{
|
||||||
|
if ("publication".equalsIgnoreCase(result._1.getClassname)) {
|
||||||
|
return new Publication
|
||||||
|
}
|
||||||
|
if ("dataset".equalsIgnoreCase(result._1.getClassname))
|
||||||
|
return new Dataset
|
||||||
}
|
}
|
||||||
null
|
null
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
package eu.dnetlib.doiboost.crossref
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.client.AbstractRestClient
|
||||||
|
import org.json4s.{DefaultFormats, JValue}
|
||||||
|
import org.json4s.jackson.JsonMethods.{compact, parse, render}
|
||||||
|
|
||||||
|
class CrossrefFunderRetriever(var cursor:String = "*") extends AbstractRestClient{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
override def extractInfo(input: String): Unit = {
|
||||||
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
lazy val json: org.json4s.JValue = parse(input)
|
||||||
|
buffer = (json \\ "items").extract[List[JValue]].map(s => compact(render(s)))
|
||||||
|
cursor = (json \ "message" \ "next-cursor").extractOrElse[String](null)
|
||||||
|
if (cursor.isEmpty)
|
||||||
|
complete = true
|
||||||
|
current_index = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_url(): String = {
|
||||||
|
|
||||||
|
println(s"cursor is $cursor")
|
||||||
|
s"https://api.crossref.org/funders?rows=1000&cursor=$cursor"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
override def getBufferData(): Unit = {
|
||||||
|
if (!complete) {
|
||||||
|
val response =
|
||||||
|
if (scroll_value.isDefined) doHTTPGETRequest(scroll_value.get)
|
||||||
|
else doHTTPGETRequest(get_url())
|
||||||
|
extractInfo(response)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,9 +1,9 @@
|
||||||
package eu.dnetlib.doiboost.crossref
|
package eu.dnetlib.doiboost.crossref
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
import eu.dnetlib.dhp.application.{AbstractScalaApplication, ArgumentApplicationParser, SparkScalaApplication}
|
||||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||||
import org.apache.spark.rdd.RDD
|
import org.apache.spark.rdd.RDD
|
||||||
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
|
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||||
import org.apache.spark.{SparkConf, SparkContext}
|
import org.apache.spark.{SparkConf, SparkContext}
|
||||||
import org.json4s
|
import org.json4s
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
|
@ -12,59 +12,103 @@ import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
|
|
||||||
object GenerateCrossrefDataset {
|
|
||||||
|
|
||||||
val log: Logger = LoggerFactory.getLogger(GenerateCrossrefDataset.getClass)
|
class SparkGenerateCrossrefDataset (propertyPath: String, args: Array[String], log: Logger)
|
||||||
|
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||||
|
|
||||||
implicit val mrEncoder: Encoder[CrossrefDT] = Encoders.kryo[CrossrefDT]
|
|
||||||
|
|
||||||
def crossrefElement(meta: String): CrossrefDT = {
|
/**
|
||||||
|
* This method convert the Json crossoref to CrossrefDT class
|
||||||
|
*
|
||||||
|
* @param metadata the json metadata
|
||||||
|
* @return the CrossrefDT
|
||||||
|
*/
|
||||||
|
def crossrefElement(metadata: String): CrossrefDT = {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json: json4s.JValue = parse(meta)
|
lazy val json: json4s.JValue = parse(metadata)
|
||||||
val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
||||||
val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long]
|
val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long]
|
||||||
CrossrefDT(doi, meta, timestamp)
|
CrossrefDT(doi, metadata, timestamp)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def main(args: Array[String]): Unit = {
|
|
||||||
val conf = new SparkConf
|
|
||||||
val parser = new ArgumentApplicationParser(
|
|
||||||
Source
|
|
||||||
.fromInputStream(
|
|
||||||
getClass.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.mkString
|
|
||||||
)
|
|
||||||
parser.parseArgument(args)
|
|
||||||
val master = parser.get("master")
|
|
||||||
val sourcePath = parser.get("sourcePath")
|
|
||||||
val targetPath = parser.get("targetPath")
|
|
||||||
|
|
||||||
val spark: SparkSession = SparkSession
|
|
||||||
.builder()
|
|
||||||
.config(conf)
|
|
||||||
.appName(UnpackCrtossrefEntries.getClass.getSimpleName)
|
|
||||||
.master(master)
|
|
||||||
.getOrCreate()
|
|
||||||
val sc: SparkContext = spark.sparkContext
|
|
||||||
|
|
||||||
|
def convertDataset(spark:SparkSession, sourcePath:String, targetPath:String):Unit = {
|
||||||
import spark.implicits._
|
import spark.implicits._
|
||||||
|
spark.read.text(sourcePath).as[String].map(entry => crossrefElement(entry))
|
||||||
val tmp: RDD[String] = sc.textFile(sourcePath, 6000)
|
|
||||||
|
|
||||||
spark
|
|
||||||
.createDataset(tmp)
|
|
||||||
.map(entry => crossrefElement(entry))
|
|
||||||
.write
|
.write
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.save(targetPath)
|
.save(targetPath)
|
||||||
// .map(meta => crossrefElement(meta))
|
|
||||||
// .toDS.as[CrossrefDT]
|
|
||||||
// .write.mode(SaveMode.Overwrite).save(targetPath)
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** Here all the spark applications runs this method
|
||||||
|
* where the whole logic of the spark node is defined
|
||||||
|
*/
|
||||||
|
override def run(): Unit = {
|
||||||
|
|
||||||
|
|
||||||
|
val sourcePath = parser.get("sourcePath")
|
||||||
|
log.info(s"sourcePath is $sourcePath")
|
||||||
|
|
||||||
|
val targetPath = parser.get("targetPath")
|
||||||
|
log.info(s"targetPath is $targetPath")
|
||||||
|
|
||||||
|
|
||||||
|
convertDataset(spark, sourcePath, targetPath)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//object GenerateCrossrefDataset {
|
||||||
|
//
|
||||||
|
// val log: Logger = LoggerFactory.getLogger(GenerateCrossrefDataset.getClass)
|
||||||
|
//
|
||||||
|
// implicit val mrEncoder: Encoder[CrossrefDT] = Encoders.kryo[CrossrefDT]
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// def main(args: Array[String]): Unit = {
|
||||||
|
// val conf = new SparkConf
|
||||||
|
// val parser = new ArgumentApplicationParser(
|
||||||
|
// Source
|
||||||
|
// .fromInputStream(
|
||||||
|
// getClass.getResourceAsStream(
|
||||||
|
// "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json"
|
||||||
|
// )
|
||||||
|
// )
|
||||||
|
// .mkString
|
||||||
|
// )
|
||||||
|
// parser.parseArgument(args)
|
||||||
|
// val master = parser.get("master")
|
||||||
|
// val sourcePath = parser.get("sourcePath")
|
||||||
|
// val targetPath = parser.get("targetPath")
|
||||||
|
//
|
||||||
|
// val spark: SparkSession = SparkSession
|
||||||
|
// .builder()
|
||||||
|
// .config(conf)
|
||||||
|
// .appName(UnpackCrtossrefEntries.getClass.getSimpleName)
|
||||||
|
// .master(master)
|
||||||
|
// .getOrCreate()
|
||||||
|
// val sc: SparkContext = spark.sparkContext
|
||||||
|
//
|
||||||
|
// import spark.implicits._
|
||||||
|
//
|
||||||
|
// val tmp: RDD[String] = sc.textFile(sourcePath, 6000)
|
||||||
|
//
|
||||||
|
// spark
|
||||||
|
// .createDataset(tmp)
|
||||||
|
// .map(entry => crossrefElement(entry))
|
||||||
|
// .write
|
||||||
|
// .mode(SaveMode.Overwrite)
|
||||||
|
// .save(targetPath)
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
//}
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
package eu.dnetlib.doiboost.crossref
|
package eu.dnetlib.doiboost.crossref
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||||
import eu.dnetlib.dhp.schema.oaf
|
import eu.dnetlib.dhp.schema.oaf
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset}
|
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset}
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory
|
||||||
import org.apache.commons.io.IOUtils
|
import org.apache.commons.io.IOUtils
|
||||||
import org.apache.spark.SparkConf
|
import org.apache.spark.SparkConf
|
||||||
import org.apache.spark.sql._
|
import org.apache.spark.sql._
|
||||||
|
@ -39,12 +41,19 @@ object SparkMapDumpIntoOAF {
|
||||||
implicit val mapEncoderRelatons: Encoder[Relation] = Encoders.kryo[Relation]
|
implicit val mapEncoderRelatons: Encoder[Relation] = Encoders.kryo[Relation]
|
||||||
implicit val mapEncoderDatasets: Encoder[oaf.Dataset] = Encoders.kryo[OafDataset]
|
implicit val mapEncoderDatasets: Encoder[oaf.Dataset] = Encoders.kryo[OafDataset]
|
||||||
|
|
||||||
|
val isLookupUrl: String = parser.get("isLookupUrl")
|
||||||
|
|
||||||
|
|
||||||
|
val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl)
|
||||||
|
val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
|
||||||
|
require(vocabularies != null)
|
||||||
|
|
||||||
val targetPath = parser.get("targetPath")
|
val targetPath = parser.get("targetPath")
|
||||||
|
|
||||||
spark.read
|
spark.read
|
||||||
.load(parser.get("sourcePath"))
|
.load(parser.get("sourcePath"))
|
||||||
.as[CrossrefDT]
|
.as[CrossrefDT]
|
||||||
.flatMap(k => Crossref2Oaf.convert(k.json))
|
.flatMap(k => Crossref2Oaf.convert(k.json,vocabularies))
|
||||||
.filter(o => o != null)
|
.filter(o => o != null)
|
||||||
.write
|
.write
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
package eu.dnetlib.doiboost;
|
||||||
|
import static org.mockito.Mockito.lenient;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.test.TestUtils;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
|
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
public abstract class AbstractVocabularyTest {
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
protected ISLookUpService isLookUpService;
|
||||||
|
|
||||||
|
protected VocabularyGroup vocabularies;
|
||||||
|
|
||||||
|
public void setUpVocabulary() throws ISLookUpException, IOException {
|
||||||
|
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
|
||||||
|
|
||||||
|
lenient()
|
||||||
|
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
|
||||||
|
.thenReturn(synonyms());
|
||||||
|
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> vocs() throws IOException {
|
||||||
|
return TestUtils.getVocabulariesMock();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> synonyms() throws IOException {
|
||||||
|
return TestUtils.getSynonymsMock();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -19,7 +19,7 @@
|
||||||
"start": {
|
"start": {
|
||||||
"date-parts": [
|
"date-parts": [
|
||||||
[
|
[
|
||||||
2021,
|
2025,
|
||||||
2,
|
2,
|
||||||
22
|
22
|
||||||
]
|
]
|
||||||
|
@ -35,7 +35,7 @@
|
||||||
"start": {
|
"start": {
|
||||||
"date-parts": [
|
"date-parts": [
|
||||||
[
|
[
|
||||||
2021,
|
2025,
|
||||||
2,
|
2,
|
||||||
22
|
22
|
||||||
]
|
]
|
||||||
|
@ -94,7 +94,6 @@
|
||||||
"family": "Stein",
|
"family": "Stein",
|
||||||
"sequence": "first",
|
"sequence": "first",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -104,7 +103,6 @@
|
||||||
"family": "Velzen",
|
"family": "Velzen",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -114,7 +112,6 @@
|
||||||
"family": "Kowalski",
|
"family": "Kowalski",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -122,7 +119,6 @@
|
||||||
"family": "Franckowiak",
|
"family": "Franckowiak",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -132,7 +128,6 @@
|
||||||
"family": "Gezari",
|
"family": "Gezari",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -142,7 +137,6 @@
|
||||||
"family": "Miller-Jones",
|
"family": "Miller-Jones",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -150,7 +144,6 @@
|
||||||
"family": "Frederick",
|
"family": "Frederick",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -160,7 +153,6 @@
|
||||||
"family": "Sfaradi",
|
"family": "Sfaradi",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -168,7 +160,6 @@
|
||||||
"family": "Bietenholz",
|
"family": "Bietenholz",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -178,7 +169,6 @@
|
||||||
"family": "Horesh",
|
"family": "Horesh",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -186,7 +176,6 @@
|
||||||
"family": "Fender",
|
"family": "Fender",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -196,7 +185,6 @@
|
||||||
"family": "Garrappa",
|
"family": "Garrappa",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -206,7 +194,6 @@
|
||||||
"family": "Ahumada",
|
"family": "Ahumada",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -214,7 +201,6 @@
|
||||||
"family": "Andreoni",
|
"family": "Andreoni",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -222,7 +208,6 @@
|
||||||
"family": "Belicki",
|
"family": "Belicki",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -232,7 +217,6 @@
|
||||||
"family": "Bellm",
|
"family": "Bellm",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -240,7 +224,6 @@
|
||||||
"family": "Böttcher",
|
"family": "Böttcher",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -248,7 +231,6 @@
|
||||||
"family": "Brinnel",
|
"family": "Brinnel",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -256,7 +238,6 @@
|
||||||
"family": "Burruss",
|
"family": "Burruss",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -266,7 +247,6 @@
|
||||||
"family": "Cenko",
|
"family": "Cenko",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -276,7 +256,6 @@
|
||||||
"family": "Coughlin",
|
"family": "Coughlin",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -286,7 +265,6 @@
|
||||||
"family": "Cunningham",
|
"family": "Cunningham",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -294,7 +272,6 @@
|
||||||
"family": "Drake",
|
"family": "Drake",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -302,7 +279,6 @@
|
||||||
"family": "Farrar",
|
"family": "Farrar",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -310,7 +286,6 @@
|
||||||
"family": "Feeney",
|
"family": "Feeney",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -318,7 +293,6 @@
|
||||||
"family": "Foley",
|
"family": "Foley",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -328,7 +302,6 @@
|
||||||
"family": "Gal-Yam",
|
"family": "Gal-Yam",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -336,7 +309,6 @@
|
||||||
"family": "Golkhou",
|
"family": "Golkhou",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -346,7 +318,6 @@
|
||||||
"family": "Goobar",
|
"family": "Goobar",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -356,7 +327,6 @@
|
||||||
"family": "Graham",
|
"family": "Graham",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -364,7 +334,6 @@
|
||||||
"family": "Hammerstein",
|
"family": "Hammerstein",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -374,7 +343,6 @@
|
||||||
"family": "Helou",
|
"family": "Helou",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -384,7 +352,6 @@
|
||||||
"family": "Hung",
|
"family": "Hung",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -392,7 +359,6 @@
|
||||||
"family": "Kasliwal",
|
"family": "Kasliwal",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -402,7 +368,6 @@
|
||||||
"family": "Kilpatrick",
|
"family": "Kilpatrick",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -412,7 +377,6 @@
|
||||||
"family": "Kong",
|
"family": "Kong",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -422,7 +386,6 @@
|
||||||
"family": "Kupfer",
|
"family": "Kupfer",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -432,7 +395,6 @@
|
||||||
"family": "Laher",
|
"family": "Laher",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -442,7 +404,6 @@
|
||||||
"family": "Mahabal",
|
"family": "Mahabal",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -452,7 +413,6 @@
|
||||||
"family": "Masci",
|
"family": "Masci",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -462,7 +422,6 @@
|
||||||
"family": "Necker",
|
"family": "Necker",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -472,7 +431,6 @@
|
||||||
"family": "Nordin",
|
"family": "Nordin",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -480,7 +438,6 @@
|
||||||
"family": "Perley",
|
"family": "Perley",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -490,7 +447,6 @@
|
||||||
"family": "Rigault",
|
"family": "Rigault",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -500,7 +456,6 @@
|
||||||
"family": "Reusch",
|
"family": "Reusch",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -508,7 +463,6 @@
|
||||||
"family": "Rodriguez",
|
"family": "Rodriguez",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -518,7 +472,6 @@
|
||||||
"family": "Rojas-Bravo",
|
"family": "Rojas-Bravo",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -528,7 +481,6 @@
|
||||||
"family": "Rusholme",
|
"family": "Rusholme",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -538,7 +490,6 @@
|
||||||
"family": "Shupe",
|
"family": "Shupe",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -548,7 +499,6 @@
|
||||||
"family": "Singer",
|
"family": "Singer",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -558,7 +508,6 @@
|
||||||
"family": "Sollerman",
|
"family": "Sollerman",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -566,7 +515,6 @@
|
||||||
"family": "Soumagnac",
|
"family": "Soumagnac",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -574,7 +522,6 @@
|
||||||
"family": "Stern",
|
"family": "Stern",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -582,7 +529,6 @@
|
||||||
"family": "Taggart",
|
"family": "Taggart",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -590,7 +536,6 @@
|
||||||
"family": "van Santen",
|
"family": "van Santen",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -598,7 +543,6 @@
|
||||||
"family": "Ward",
|
"family": "Ward",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -606,7 +550,6 @@
|
||||||
"family": "Woudt",
|
"family": "Woudt",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -616,7 +559,6 @@
|
||||||
"family": "Yao",
|
"family": "Yao",
|
||||||
"sequence": "additional",
|
"sequence": "additional",
|
||||||
"affiliation": [
|
"affiliation": [
|
||||||
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -1412,7 +1354,6 @@
|
||||||
"Nature Astronomy"
|
"Nature Astronomy"
|
||||||
],
|
],
|
||||||
"original-title": [
|
"original-title": [
|
||||||
|
|
||||||
],
|
],
|
||||||
"language": "en",
|
"language": "en",
|
||||||
"link": [
|
"link": [
|
||||||
|
@ -1438,30 +1379,28 @@
|
||||||
"deposited": {
|
"deposited": {
|
||||||
"date-parts": [
|
"date-parts": [
|
||||||
[
|
[
|
||||||
2021,
|
2051,
|
||||||
5,
|
5,
|
||||||
17
|
17
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"date-time": "2021-05-17T15:08:12Z",
|
"date-time": "2051-05-17T15:08:12Z",
|
||||||
"timestamp": 1621264092000
|
"timestamp": 1621264092000
|
||||||
},
|
},
|
||||||
"score": 1.0,
|
"score": 1.0,
|
||||||
"subtitle": [
|
"subtitle": [
|
||||||
|
|
||||||
],
|
],
|
||||||
"short-title": [
|
"short-title": [
|
||||||
|
|
||||||
],
|
],
|
||||||
"issued": {
|
"issued": {
|
||||||
"date-parts": [
|
"date-parts": [
|
||||||
[
|
[
|
||||||
2021,
|
2051,
|
||||||
2,
|
2,
|
||||||
22
|
22
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"date-time": "2021-05-17T15:08:12Z"
|
"date-time": "2051-05-17T15:08:12Z"
|
||||||
},
|
},
|
||||||
"references-count": 83,
|
"references-count": 83,
|
||||||
"journal-issue": {
|
"journal-issue": {
|
||||||
|
@ -1481,7 +1420,6 @@
|
||||||
"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8",
|
"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8",
|
||||||
"relation": {
|
"relation": {
|
||||||
"cites": [
|
"cites": [
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"ISSN": [
|
"ISSN": [
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
package eu.dnetlib.dhp.doiboost.crossref
|
||||||
|
|
||||||
|
import eu.dnetlib.doiboost.crossref.CrossrefFunderRetriever
|
||||||
|
import org.json4s.DefaultFormats
|
||||||
|
import org.json4s.JsonAST.JString
|
||||||
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
|
import org.junit.jupiter.api.Test
|
||||||
|
|
||||||
|
import java.io.PrintWriter
|
||||||
|
class CrossrefFunderTest {
|
||||||
|
|
||||||
|
|
||||||
|
def parse_funder(input:String):(String, String, List[String]) = {
|
||||||
|
|
||||||
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
lazy val json: org.json4s.JValue = parse(input)
|
||||||
|
|
||||||
|
|
||||||
|
val l:List[String] = for { JString(desc) <- json \ "descendants" } yield desc
|
||||||
|
|
||||||
|
|
||||||
|
((json \ "name").extract[String],(json \ "uri").extract[String], l)
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def testFunderRelationshipsMapping(): Unit = {
|
||||||
|
val cf = new CrossrefFunderRetriever()
|
||||||
|
|
||||||
|
var i = 0
|
||||||
|
|
||||||
|
val w =new PrintWriter("/tmp/funder_names")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cf.map(s=>parse_funder(s)).foreach(s=> w.write(s"${s._1} \t${s._2} \t${s._3.mkString("--")}\t\n"))
|
||||||
|
|
||||||
|
|
||||||
|
w.close()
|
||||||
|
|
||||||
|
// cf.foreach{_ =>
|
||||||
|
//
|
||||||
|
// i = i+1
|
||||||
|
//
|
||||||
|
// if (i % 1000 == 0)
|
||||||
|
// println(s"get $i documents")
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// println(s"Total item $i")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -2,21 +2,32 @@ package eu.dnetlib.dhp.doiboost.crossref
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf._
|
import eu.dnetlib.dhp.schema.oaf._
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
|
import eu.dnetlib.doiboost.AbstractVocabularyTest
|
||||||
import eu.dnetlib.doiboost.crossref.Crossref2Oaf
|
import eu.dnetlib.doiboost.crossref.Crossref2Oaf
|
||||||
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
|
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
|
||||||
import org.junit.jupiter.api.Assertions._
|
import org.junit.jupiter.api.Assertions._
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.{BeforeAll, BeforeEach, Test}
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
import scala.util.matching.Regex
|
import scala.util.matching.Regex
|
||||||
|
|
||||||
class CrossrefMappingTest {
|
|
||||||
|
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||||
|
class CrossrefMappingTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||||
val mapper = new ObjectMapper()
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
def setUp(): Unit = {
|
||||||
|
|
||||||
|
setUpVocabulary()
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testFunderRelationshipsMapping(): Unit = {
|
def testFunderRelationshipsMapping(): Unit = {
|
||||||
val template = Source
|
val template = Source
|
||||||
|
@ -33,13 +44,13 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
for (line <- funder_doi.lines) {
|
for (line <- funder_doi.lines) {
|
||||||
val json = template.replace("%s", line)
|
val json = template.replace("%s", line)
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
checkRelation(resultList)
|
checkRelation(resultList)
|
||||||
}
|
}
|
||||||
for (line <- funder_name.lines) {
|
for (line <- funder_name.lines) {
|
||||||
val json = template.replace("%s", line)
|
val json = template.replace("%s", line)
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
checkRelation(resultList)
|
checkRelation(resultList)
|
||||||
}
|
}
|
||||||
|
@ -79,7 +90,7 @@ class CrossrefMappingTest {
|
||||||
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/issue_date.json")).mkString
|
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/issue_date.json")).mkString
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty)
|
assertFalse(json.isEmpty)
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||||
|
@ -98,7 +109,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty)
|
assertFalse(json.isEmpty)
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -120,7 +131,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -141,7 +152,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -177,7 +188,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty)
|
assertFalse(json.isEmpty)
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
val rels: List[Relation] =
|
val rels: List[Relation] =
|
||||||
|
@ -197,7 +208,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -273,7 +284,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -357,7 +368,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -405,7 +416,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -485,7 +496,7 @@ class CrossrefMappingTest {
|
||||||
|
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -508,7 +519,7 @@ class CrossrefMappingTest {
|
||||||
val line: String =
|
val line: String =
|
||||||
"\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}],"
|
"\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}],"
|
||||||
val json = template.replace("%s", line)
|
val json = template.replace("%s", line)
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
||||||
val result: Result = items.head.asInstanceOf[Publication]
|
val result: Result = items.head.asInstanceOf[Publication]
|
||||||
|
@ -527,7 +538,7 @@ class CrossrefMappingTest {
|
||||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json"))
|
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json"))
|
||||||
.mkString
|
.mkString
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(template)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(template, vocabularies)
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
||||||
val result: Result = items.head.asInstanceOf[Publication]
|
val result: Result = items.head.asInstanceOf[Publication]
|
||||||
|
@ -551,7 +562,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -581,7 +592,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -616,7 +627,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -651,7 +662,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -686,7 +697,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
@ -719,7 +730,7 @@ class CrossrefMappingTest {
|
||||||
assertNotNull(json)
|
assertNotNull(json)
|
||||||
assertFalse(json.isEmpty);
|
assertFalse(json.isEmpty);
|
||||||
|
|
||||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
val resultList: List[Oaf] = Crossref2Oaf.convert(json,vocabularies)
|
||||||
|
|
||||||
assertTrue(resultList.nonEmpty)
|
assertTrue(resultList.nonEmpty)
|
||||||
|
|
||||||
|
|
2
pom.xml
2
pom.xml
|
@ -801,7 +801,7 @@
|
||||||
<mockito-core.version>3.3.3</mockito-core.version>
|
<mockito-core.version>3.3.3</mockito-core.version>
|
||||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||||
<vtd.version>[2.12,3.0)</vtd.version>
|
<vtd.version>[2.12,3.0)</vtd.version>
|
||||||
<dhp-schemas.version>[2.12.0]</dhp-schemas.version>
|
<dhp-schemas.version>2.12.1-DOIBOOST_REFACTOR-SNAPSHOT</dhp-schemas.version>
|
||||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||||
|
|
Loading…
Reference in New Issue