forked from D-Net/dnet-hadoop
refactoring the Oaf records merge utilities into dhp-common
This commit is contained in:
parent
7ae7e8aa06
commit
aaa73f89d1
|
@ -1,6 +1,24 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.merge;
|
package eu.dnetlib.dhp.oa.merge;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static org.apache.spark.sql.functions.col;
|
||||||
|
import static org.apache.spark.sql.functions.when;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.ForkJoinPool;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.api.java.function.ReduceFunction;
|
||||||
|
import org.apache.spark.sql.*;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
|
@ -12,25 +30,8 @@ import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.api.java.function.ReduceFunction;
|
|
||||||
import org.apache.spark.sql.*;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.concurrent.ExecutionException;
|
|
||||||
import java.util.concurrent.ForkJoinPool;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
import static org.apache.spark.sql.functions.col;
|
|
||||||
import static org.apache.spark.sql.functions.when;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Groups the graph content by entity identifier to ensure ID uniqueness
|
* Groups the graph content by entity identifier to ensure ID uniqueness
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public class CleaningFunctions {
|
||||||
|
|
||||||
|
public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
|
||||||
|
public static final String DOI_PREFIX = "10.";
|
||||||
|
|
||||||
|
public static final Set<String> PID_BLACKLIST = new HashSet<>();
|
||||||
|
|
||||||
|
static {
|
||||||
|
PID_BLACKLIST.add("none");
|
||||||
|
PID_BLACKLIST.add("na");
|
||||||
|
}
|
||||||
|
|
||||||
|
public CleaningFunctions() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility method that filter PID values on a per-type basis.
|
||||||
|
* @param s the PID whose value will be checked.
|
||||||
|
* @return false if the pid matches the filter criteria, true otherwise.
|
||||||
|
*/
|
||||||
|
public static boolean pidFilter(StructuredProperty s) {
|
||||||
|
final String pidValue = s.getValue();
|
||||||
|
if (Objects.isNull(s.getQualifier()) ||
|
||||||
|
StringUtils.isBlank(pidValue) ||
|
||||||
|
StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return !PidBlacklistProvider.getBlacklist(s.getQualifier().getClassid()).contains(pidValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility method that normalises PID values on a per-type basis.
|
||||||
|
* @param pid the PID whose value will be normalised.
|
||||||
|
* @return the PID containing the normalised value.
|
||||||
|
*/
|
||||||
|
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
|
||||||
|
pid.setValue(
|
||||||
|
normalizePidValue(
|
||||||
|
pid.getQualifier().getClassid(),
|
||||||
|
pid.getValue()));
|
||||||
|
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String normalizePidValue(String pidType, String pidValue) {
|
||||||
|
String value = Optional
|
||||||
|
.ofNullable(pidValue)
|
||||||
|
.map(String::trim)
|
||||||
|
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
|
||||||
|
|
||||||
|
switch (pidType) {
|
||||||
|
|
||||||
|
// TODO add cleaning for more PID types as needed
|
||||||
|
case "doi":
|
||||||
|
return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,290 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import com.google.common.collect.HashBiMap;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import org.apache.commons.codec.binary.Hex;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.security.MessageDigest;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import static com.google.common.base.Preconditions.checkArgument;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory class for OpenAIRE identifiers in the Graph
|
||||||
|
*/
|
||||||
|
public class IdentifierFactory implements Serializable {
|
||||||
|
|
||||||
|
public static final String ID_SEPARATOR = "::";
|
||||||
|
public static final String ID_PREFIX_SEPARATOR = "|";
|
||||||
|
|
||||||
|
public static final int ID_PREFIX_LEN = 12;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] considered authoritative for that PID_TYPE.
|
||||||
|
* The id of the record (source_::id) will be rewritten as pidType_::id)
|
||||||
|
*/
|
||||||
|
public static final Map<PidType, HashBiMap<String, String>> PID_AUTHORITY = Maps.newHashMap();
|
||||||
|
|
||||||
|
static {
|
||||||
|
PID_AUTHORITY.put(PidType.doi, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref");
|
||||||
|
PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite");
|
||||||
|
PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "ZENODO");
|
||||||
|
PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "Zenodo");
|
||||||
|
|
||||||
|
PID_AUTHORITY.put(PidType.pmc, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
|
||||||
|
PID_AUTHORITY.get(PidType.pmc).put(PUBMED_CENTRAL_ID, "PubMed Central");
|
||||||
|
|
||||||
|
PID_AUTHORITY.put(PidType.pmid, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.pmid).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
|
||||||
|
PID_AUTHORITY.get(PidType.pmid).put(PUBMED_CENTRAL_ID, "PubMed Central");
|
||||||
|
|
||||||
|
PID_AUTHORITY.put(PidType.arXiv, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive");
|
||||||
|
|
||||||
|
PID_AUTHORITY.put(PidType.w3id, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ROHub");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Declares the associations PID_TYPE -> [DATASOURCE ID, PID SUBSTRING] considered as delegated authority for that
|
||||||
|
* PID_TYPE. Example, Zenodo is delegated to forge DOIs that contain the 'zenodo' word.
|
||||||
|
*
|
||||||
|
* If a record with the same id (same pid) comes from 2 data sources, the one coming from a delegated source wins. E.g. Zenodo records win over those from Datacite.
|
||||||
|
* See also https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/187 and the class dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
|
||||||
|
*/
|
||||||
|
public static final Map<PidType, Map<String, String>> DELEGATED_PID_AUTHORITY = Maps.newHashMap();
|
||||||
|
|
||||||
|
static {
|
||||||
|
DELEGATED_PID_AUTHORITY.put(PidType.doi, new HashMap<>());
|
||||||
|
DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "zenodo");
|
||||||
|
DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "zenodo");
|
||||||
|
DELEGATED_PID_AUTHORITY.put(PidType.w3id, new HashMap<>());
|
||||||
|
DELEGATED_PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ro-id");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] whose records are considered enrichment for the graph.
|
||||||
|
* Their OpenAIRE ID is built from the declared PID type. Are merged with their corresponding record, identified by
|
||||||
|
* the same OpenAIRE id.
|
||||||
|
*/
|
||||||
|
public static final Map<PidType, HashBiMap<String, String>> ENRICHMENT_PROVIDER = Maps.newHashMap();
|
||||||
|
|
||||||
|
static {
|
||||||
|
ENRICHMENT_PROVIDER.put(PidType.doi, HashBiMap.create());
|
||||||
|
ENRICHMENT_PROVIDER.get(PidType.doi).put(OPEN_APC_ID, OPEN_APC_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Set<String> delegatedAuthorityDatasourceIds() {
|
||||||
|
return DELEGATED_PID_AUTHORITY.values()
|
||||||
|
.stream()
|
||||||
|
.flatMap(m -> m.keySet().stream())
|
||||||
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
|
||||||
|
return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T extends Result> String createDOIBoostIdentifier(T entity) {
|
||||||
|
if (entity == null)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
StructuredProperty pid = null;
|
||||||
|
if (entity.getPid() != null) {
|
||||||
|
pid = entity
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid()))
|
||||||
|
.filter(CleaningFunctions::pidFilter)
|
||||||
|
.findAny()
|
||||||
|
.orElse(null);
|
||||||
|
} else {
|
||||||
|
if (entity.getInstance() != null) {
|
||||||
|
pid = entity
|
||||||
|
.getInstance()
|
||||||
|
.stream()
|
||||||
|
.filter(i -> i.getPid() != null)
|
||||||
|
.flatMap(i -> i.getPid().stream())
|
||||||
|
.filter(CleaningFunctions::pidFilter)
|
||||||
|
.findAny()
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (pid != null)
|
||||||
|
return idFromPid(entity, pid, true);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an identifier from the most relevant PID (if available) provided by a known PID authority in the given
|
||||||
|
* entity T. Returns entity.id when none of the PIDs meet the selection criteria is available.
|
||||||
|
*
|
||||||
|
* @param entity the entity providing PIDs and a default ID.
|
||||||
|
* @param <T> the specific entity type. Currently Organization and Result subclasses are supported.
|
||||||
|
* @param md5 indicates whether should hash the PID value or not.
|
||||||
|
* @return an identifier from the most relevant PID, entity.id otherwise
|
||||||
|
*/
|
||||||
|
public static <T extends OafEntity> String createIdentifier(T entity, boolean md5) {
|
||||||
|
|
||||||
|
checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier");
|
||||||
|
|
||||||
|
final Map<String, Set<StructuredProperty>> pids = extractPids(entity);
|
||||||
|
|
||||||
|
return pids
|
||||||
|
.values()
|
||||||
|
.stream()
|
||||||
|
.flatMap(Set::stream)
|
||||||
|
.min(new PidComparator<>(entity))
|
||||||
|
.map(
|
||||||
|
min -> Optional
|
||||||
|
.ofNullable(pids.get(min.getQualifier().getClassid()))
|
||||||
|
.map(
|
||||||
|
p -> p
|
||||||
|
.stream()
|
||||||
|
.sorted(new PidValueComparator())
|
||||||
|
.findFirst()
|
||||||
|
.map(s -> idFromPid(entity, s, md5))
|
||||||
|
.orElseGet(entity::getId))
|
||||||
|
.orElseGet(entity::getId))
|
||||||
|
.orElseGet(entity::getId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends OafEntity> Map<String, Set<StructuredProperty>> extractPids(T entity) {
|
||||||
|
if (entity instanceof Result) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(((Result) entity).getInstance())
|
||||||
|
.map(IdentifierFactory::mapPids)
|
||||||
|
.orElse(new HashMap<>());
|
||||||
|
} else {
|
||||||
|
return entity
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.map(CleaningFunctions::normalizePidValue)
|
||||||
|
.filter(CleaningFunctions::pidFilter)
|
||||||
|
.collect(
|
||||||
|
Collectors
|
||||||
|
.groupingBy(
|
||||||
|
p -> p.getQualifier().getClassid(),
|
||||||
|
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Map<String, Set<StructuredProperty>> mapPids(List<Instance> instance) {
|
||||||
|
return instance
|
||||||
|
.stream()
|
||||||
|
.map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false))
|
||||||
|
.flatMap(Function.identity())
|
||||||
|
.collect(
|
||||||
|
Collectors
|
||||||
|
.groupingBy(
|
||||||
|
p -> p.getQualifier().getClassid(),
|
||||||
|
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Stream<StructuredProperty> pidFromInstance(List<StructuredProperty> pid, KeyValue collectedFrom,
|
||||||
|
boolean mapHandles) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(pid)
|
||||||
|
.map(
|
||||||
|
pp -> pp
|
||||||
|
.stream()
|
||||||
|
// filter away PIDs provided by a DS that is not considered an authority for the
|
||||||
|
// given PID Type
|
||||||
|
.filter(p -> shouldFilterPidByCriteria(collectedFrom, p, mapHandles))
|
||||||
|
.map(CleaningFunctions::normalizePidValue)
|
||||||
|
.filter(p -> isNotFromDelegatedAuthority(collectedFrom, p))
|
||||||
|
.filter(CleaningFunctions::pidFilter))
|
||||||
|
.orElse(Stream.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static boolean shouldFilterPidByCriteria(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) {
|
||||||
|
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
|
||||||
|
|
||||||
|
if (Objects.isNull(collectedFrom)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean isEnrich = Optional
|
||||||
|
.ofNullable(ENRICHMENT_PROVIDER.get(pType))
|
||||||
|
.map(enrich -> enrich.containsKey(collectedFrom.getKey())
|
||||||
|
|| enrich.containsValue(collectedFrom.getValue()))
|
||||||
|
.orElse(false);
|
||||||
|
|
||||||
|
boolean isAuthority = Optional
|
||||||
|
.ofNullable(PID_AUTHORITY.get(pType))
|
||||||
|
.map(authorities -> authorities.containsKey(collectedFrom.getKey())
|
||||||
|
|| authorities.containsValue(collectedFrom.getValue()))
|
||||||
|
.orElse(false);
|
||||||
|
|
||||||
|
return (mapHandles && pType.equals(PidType.handle)) || isEnrich || isAuthority;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isNotFromDelegatedAuthority(KeyValue collectedFrom, StructuredProperty p) {
|
||||||
|
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
|
||||||
|
|
||||||
|
final Map<String, String> da = DELEGATED_PID_AUTHORITY.get(pType);
|
||||||
|
if (Objects.isNull(da)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!da.containsKey(collectedFrom.getKey())) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return StringUtils.contains(p.getValue(), da.get(collectedFrom.getKey()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see {@link IdentifierFactory#createIdentifier(OafEntity, boolean)}
|
||||||
|
*/
|
||||||
|
public static <T extends OafEntity> String createIdentifier(T entity) {
|
||||||
|
|
||||||
|
return createIdentifier(entity, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends OafEntity> String idFromPid(T entity, StructuredProperty s, boolean md5) {
|
||||||
|
return idFromPid(ModelSupport.getIdPrefix(entity.getClass()), s.getQualifier().getClassid(), s.getValue(), md5);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) {
|
||||||
|
return new StringBuilder()
|
||||||
|
.append(numericPrefix)
|
||||||
|
.append(ID_PREFIX_SEPARATOR)
|
||||||
|
.append(createPrefix(pidType))
|
||||||
|
.append(ID_SEPARATOR)
|
||||||
|
.append(md5 ? md5(pidValue) : pidValue)
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
// create the prefix (length = 12)
|
||||||
|
private static String createPrefix(String pidType) {
|
||||||
|
StringBuilder prefix = new StringBuilder(StringUtils.left(pidType, ID_PREFIX_LEN));
|
||||||
|
while (prefix.length() < ID_PREFIX_LEN) {
|
||||||
|
prefix.append("_");
|
||||||
|
}
|
||||||
|
return prefix.substring(0, ID_PREFIX_LEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String md5(final String s) {
|
||||||
|
try {
|
||||||
|
final MessageDigest md = MessageDigest.getInstance("MD5");
|
||||||
|
md.update(s.getBytes(StandardCharsets.UTF_8));
|
||||||
|
return new String(Hex.encodeHex(md.digest()));
|
||||||
|
} catch (final Exception e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,22 +1,21 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
//
|
|
||||||
// Source code recreated from a .class file by IntelliJ IDEA
|
|
||||||
// (powered by FernFlower decompiler)
|
|
||||||
//
|
|
||||||
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Source code recreated from a .class file by IntelliJ IDEA
|
||||||
|
// (powered by FernFlower decompiler)
|
||||||
|
//
|
||||||
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class MergeComparator implements Comparator<Oaf> {
|
public class MergeComparator implements Comparator<Oaf> {
|
||||||
public MergeComparator() {
|
public MergeComparator() {
|
||||||
}
|
}
|
||||||
|
@ -41,14 +40,14 @@ public class MergeComparator implements Comparator<Oaf> {
|
||||||
// collectedfrom
|
// collectedfrom
|
||||||
HashSet<String> lCf = getCollectedFromIds(left);
|
HashSet<String> lCf = getCollectedFromIds(left);
|
||||||
HashSet<String> rCf = getCollectedFromIds(right);
|
HashSet<String> rCf = getCollectedFromIds(right);
|
||||||
if (lCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2") && !rCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")) {
|
if (lCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
|
||||||
|
&& !rCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")) {
|
||||||
return -1;
|
return -1;
|
||||||
} else if (!lCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2") && rCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")) {
|
} else if (!lCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
|
||||||
|
&& rCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
SubEntityType lClass = SubEntityType.fromClass(left.getClass());
|
SubEntityType lClass = SubEntityType.fromClass(left.getClass());
|
||||||
SubEntityType rClass = SubEntityType.fromClass(right.getClass());
|
SubEntityType rClass = SubEntityType.fromClass(right.getClass());
|
||||||
return lClass.ordinal() - rClass.ordinal();
|
return lClass.ordinal() - rClass.ordinal();
|
||||||
|
|
|
@ -1,21 +1,26 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static org.apache.commons.lang3.ObjectUtils.firstNonNull;
|
||||||
|
import static com.google.common.base.Preconditions.checkArgument;
|
||||||
|
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.function.BinaryOperator;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import com.google.common.base.Joiner;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.lang3.tuple.ImmutablePair;
|
||||||
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.apache.commons.lang3.tuple.ImmutablePair;
|
|
||||||
import org.apache.commons.lang3.tuple.Pair;
|
|
||||||
|
|
||||||
import java.text.ParseException;
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import static com.google.common.base.Objects.firstNonNull;
|
|
||||||
import static com.google.common.base.Preconditions.checkArgument;
|
|
||||||
|
|
||||||
public class MergeUtils {
|
public class MergeUtils {
|
||||||
|
|
||||||
|
@ -168,8 +173,7 @@ public class MergeUtils {
|
||||||
return a || b;
|
return a || b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static <T, K> List<T> mergeLists(final List<T> left, final List<T> right, int trust, Function<T, K> keyExtractor, BinaryOperator<T> merger) {
|
||||||
private static <T> List<T> unionDistinctLists(final List<T> left, final List<T> right, int trust) {
|
|
||||||
if (left == null) {
|
if (left == null) {
|
||||||
return right;
|
return right;
|
||||||
} else if (right == null) {
|
} else if (right == null) {
|
||||||
|
@ -179,7 +183,26 @@ public class MergeUtils {
|
||||||
List<T> h = trust >= 0 ? left : right;
|
List<T> h = trust >= 0 ? left : right;
|
||||||
List<T> l = trust >= 0 ? right : left;
|
List<T> l = trust >= 0 ? right : left;
|
||||||
|
|
||||||
return Stream.concat(h.stream(), l.stream())
|
return new ArrayList<>(Stream
|
||||||
|
.concat(h.stream(), l.stream())
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.distinct()
|
||||||
|
.collect(Collectors.toMap(keyExtractor, v -> v, merger))
|
||||||
|
.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T, K> List<T> unionDistinctLists(final List<T> left, final List<T> right, int trust) {
|
||||||
|
if (left == null) {
|
||||||
|
return right;
|
||||||
|
} else if (right == null) {
|
||||||
|
return left;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<T> h = trust >= 0 ? left : right;
|
||||||
|
List<T> l = trust >= 0 ? right : left;
|
||||||
|
|
||||||
|
return Stream
|
||||||
|
.concat(h.stream(), l.stream())
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.distinct()
|
.distinct()
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
@ -192,7 +215,8 @@ public class MergeUtils {
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Stream.concat(l.stream(), r.stream())
|
return Stream
|
||||||
|
.concat(l.stream(), r.stream())
|
||||||
.filter(StringUtils::isNotBlank)
|
.filter(StringUtils::isNotBlank)
|
||||||
.distinct()
|
.distinct()
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
@ -213,7 +237,8 @@ public class MergeUtils {
|
||||||
return new ArrayList<>(values.values());
|
return new ArrayList<>(values.values());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<StructuredProperty> unionTitle(List<StructuredProperty> left, List<StructuredProperty> right, int trust) {
|
private static List<StructuredProperty> unionTitle(List<StructuredProperty> left, List<StructuredProperty> right,
|
||||||
|
int trust) {
|
||||||
if (left == null) {
|
if (left == null) {
|
||||||
return right;
|
return right;
|
||||||
} else if (right == null) {
|
} else if (right == null) {
|
||||||
|
@ -223,7 +248,8 @@ public class MergeUtils {
|
||||||
List<StructuredProperty> h = trust >= 0 ? left : right;
|
List<StructuredProperty> h = trust >= 0 ? left : right;
|
||||||
List<StructuredProperty> l = trust >= 0 ? right : left;
|
List<StructuredProperty> l = trust >= 0 ? right : left;
|
||||||
|
|
||||||
return Stream.concat(h.stream(), l.stream())
|
return Stream
|
||||||
|
.concat(h.stream(), l.stream())
|
||||||
.filter(Objects::isNull)
|
.filter(Objects::isNull)
|
||||||
.distinct()
|
.distinct()
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
@ -263,7 +289,9 @@ public class MergeUtils {
|
||||||
// dateofcollection mettere today quando si fa merge
|
// dateofcollection mettere today quando si fa merge
|
||||||
merged.setDateofcollection(chooseString(merged.getDateofcollection(), enrich.getDateofcollection(), trust));
|
merged.setDateofcollection(chooseString(merged.getDateofcollection(), enrich.getDateofcollection(), trust));
|
||||||
// setDateoftransformation mettere vuota in dedup, nota per Claudio
|
// setDateoftransformation mettere vuota in dedup, nota per Claudio
|
||||||
merged.setDateoftransformation(chooseString(merged.getDateoftransformation(), enrich.getDateoftransformation(), trust));
|
merged
|
||||||
|
.setDateoftransformation(
|
||||||
|
chooseString(merged.getDateoftransformation(), enrich.getDateoftransformation(), trust));
|
||||||
// TODO: was missing in OafEntity.merge
|
// TODO: was missing in OafEntity.merge
|
||||||
merged.setExtraInfo(unionDistinctLists(merged.getExtraInfo(), enrich.getExtraInfo(), trust));
|
merged.setExtraInfo(unionDistinctLists(merged.getExtraInfo(), enrich.getExtraInfo(), trust));
|
||||||
// oaiprovenanze da mettere a null quando si genera merge
|
// oaiprovenanze da mettere a null quando si genera merge
|
||||||
|
@ -273,7 +301,6 @@ public class MergeUtils {
|
||||||
return merged;
|
return merged;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static <T extends Relation> T mergeRelation(T original, T enrich) {
|
public static <T extends Relation> T mergeRelation(T original, T enrich) {
|
||||||
int trust = compareTrust(original, enrich);
|
int trust = compareTrust(original, enrich);
|
||||||
T merge = mergeOafFields(original, enrich, trust);
|
T merge = mergeOafFields(original, enrich, trust);
|
||||||
|
@ -309,7 +336,8 @@ public class MergeUtils {
|
||||||
final int trust = compareTrust(original, enrich);
|
final int trust = compareTrust(original, enrich);
|
||||||
T merge = mergeOafEntityFields(original, enrich, trust);
|
T merge = mergeOafEntityFields(original, enrich, trust);
|
||||||
|
|
||||||
if (merge.getProcessingchargeamount() == null || StringUtils.isBlank(merge.getProcessingchargeamount().getValue())) {
|
if (merge.getProcessingchargeamount() == null
|
||||||
|
|| StringUtils.isBlank(merge.getProcessingchargeamount().getValue())) {
|
||||||
merge.setProcessingchargeamount(enrich.getProcessingchargeamount());
|
merge.setProcessingchargeamount(enrich.getProcessingchargeamount());
|
||||||
merge.setProcessingchargecurrency(enrich.getProcessingchargecurrency());
|
merge.setProcessingchargecurrency(enrich.getProcessingchargecurrency());
|
||||||
}
|
}
|
||||||
|
@ -350,6 +378,7 @@ public class MergeUtils {
|
||||||
// prima prendo l'higher trust, su questo prendo il valore migliore nelle istanze TODO
|
// prima prendo l'higher trust, su questo prendo il valore migliore nelle istanze TODO
|
||||||
// trust maggiore ma a parita' di trust il piu' specifico (base del vocabolario)
|
// trust maggiore ma a parita' di trust il piu' specifico (base del vocabolario)
|
||||||
// vedi note
|
// vedi note
|
||||||
|
// cannot use com.google.common.base.Objects.firstNonNull as it throws NPE when both terms are null
|
||||||
merge.setResourcetype(firstNonNull(merge.getResourcetype(), enrich.getResourcetype()));
|
merge.setResourcetype(firstNonNull(merge.getResourcetype(), enrich.getResourcetype()));
|
||||||
|
|
||||||
// ok
|
// ok
|
||||||
|
@ -366,12 +395,18 @@ public class MergeUtils {
|
||||||
merge.setContext(unionDistinctLists(merge.getContext(), enrich.getContext(), trust));
|
merge.setContext(unionDistinctLists(merge.getContext(), enrich.getContext(), trust));
|
||||||
|
|
||||||
// ok
|
// ok
|
||||||
merge.setExternalReference(unionDistinctLists(merge.getExternalReference(), enrich.getExternalReference(), trust));
|
merge
|
||||||
|
.setExternalReference(
|
||||||
|
unionDistinctLists(merge.getExternalReference(), enrich.getExternalReference(), trust));
|
||||||
|
|
||||||
// instance enrichment or union
|
// instance enrichment or union
|
||||||
// review instance equals => add pid to comparision
|
// review instance equals => add pid to comparision
|
||||||
if (!isAnEnrichment(merge) && !isAnEnrichment(enrich))
|
if (!isAnEnrichment(merge) && !isAnEnrichment(enrich))
|
||||||
merge.setInstance(unionDistinctLists(merge.getInstance(), enrich.getInstance(), trust));
|
merge.setInstance(
|
||||||
|
mergeLists(merge.getInstance(), enrich.getInstance(), trust,
|
||||||
|
MergeUtils::instanceKeyExtractor,
|
||||||
|
MergeUtils::instanceMerger
|
||||||
|
));
|
||||||
else {
|
else {
|
||||||
final List<Instance> enrichmentInstances = isAnEnrichment(merge) ? merge.getInstance()
|
final List<Instance> enrichmentInstances = isAnEnrichment(merge) ? merge.getInstance()
|
||||||
: enrich.getInstance();
|
: enrich.getInstance();
|
||||||
|
@ -392,6 +427,39 @@ public class MergeUtils {
|
||||||
return merge;
|
return merge;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String instanceKeyExtractor(Instance i) {
|
||||||
|
return String.join("::",
|
||||||
|
kvKeyExtractor(i.getHostedby()),
|
||||||
|
qualifierKeyExtractor(i.getAccessright()),
|
||||||
|
qualifierKeyExtractor(i.getInstancetype()),
|
||||||
|
Optional.ofNullable(i.getUrl()).map(u -> String.join("::", u)).orElse(null),
|
||||||
|
Optional.ofNullable(i.getPid()).map(pp -> pp.stream().map(MergeUtils::spKeyExtractor).collect(Collectors.joining("::"))).orElse(null));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String kvKeyExtractor(KeyValue kv) {
|
||||||
|
return Optional.ofNullable(kv).map(KeyValue::getKey).orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String qualifierKeyExtractor(Qualifier q) {
|
||||||
|
return Optional.ofNullable(q).map(Qualifier::getClassid).orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T> T FieldKeyExtractor(Field<T> f) {
|
||||||
|
return Optional.ofNullable(f).map(Field::getValue).orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String spKeyExtractor(StructuredProperty sp) {
|
||||||
|
return Optional.ofNullable(sp).map(s -> Joiner.on("::").join(s, qualifierKeyExtractor(s.getQualifier()))).orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Instance instanceMerger(Instance i1, Instance i2) {
|
||||||
|
|
||||||
|
// TODO implement me!
|
||||||
|
|
||||||
|
return i1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private static <T extends OtherResearchProduct> T mergeORP(T original, T enrich) {
|
private static <T extends OtherResearchProduct> T mergeORP(T original, T enrich) {
|
||||||
int trust = compareTrust(original, enrich);
|
int trust = compareTrust(original, enrich);
|
||||||
final T merge = mergeResult(original, enrich);
|
final T merge = mergeResult(original, enrich);
|
||||||
|
@ -410,7 +478,9 @@ public class MergeUtils {
|
||||||
merge.setDocumentationUrl(unionDistinctLists(merge.getDocumentationUrl(), enrich.getDocumentationUrl(), trust));
|
merge.setDocumentationUrl(unionDistinctLists(merge.getDocumentationUrl(), enrich.getDocumentationUrl(), trust));
|
||||||
merge.setLicense(unionDistinctLists(merge.getLicense(), enrich.getLicense(), trust));
|
merge.setLicense(unionDistinctLists(merge.getLicense(), enrich.getLicense(), trust));
|
||||||
merge.setCodeRepositoryUrl(chooseReference(merge.getCodeRepositoryUrl(), enrich.getCodeRepositoryUrl(), trust));
|
merge.setCodeRepositoryUrl(chooseReference(merge.getCodeRepositoryUrl(), enrich.getCodeRepositoryUrl(), trust));
|
||||||
merge.setProgrammingLanguage(chooseReference(merge.getProgrammingLanguage(), enrich.getProgrammingLanguage(), trust));
|
merge
|
||||||
|
.setProgrammingLanguage(
|
||||||
|
chooseReference(merge.getProgrammingLanguage(), enrich.getProgrammingLanguage(), trust));
|
||||||
|
|
||||||
return merge;
|
return merge;
|
||||||
}
|
}
|
||||||
|
@ -423,8 +493,12 @@ public class MergeUtils {
|
||||||
merge.setDevice(chooseReference(merge.getDevice(), enrich.getDevice(), trust));
|
merge.setDevice(chooseReference(merge.getDevice(), enrich.getDevice(), trust));
|
||||||
merge.setSize(chooseReference(merge.getSize(), enrich.getSize(), trust));
|
merge.setSize(chooseReference(merge.getSize(), enrich.getSize(), trust));
|
||||||
merge.setVersion(chooseReference(merge.getVersion(), enrich.getVersion(), trust));
|
merge.setVersion(chooseReference(merge.getVersion(), enrich.getVersion(), trust));
|
||||||
merge.setLastmetadataupdate(chooseReference(merge.getLastmetadataupdate(), enrich.getLastmetadataupdate(), trust));
|
merge
|
||||||
merge.setMetadataversionnumber(chooseReference(merge.getMetadataversionnumber(), enrich.getMetadataversionnumber(), trust));
|
.setLastmetadataupdate(
|
||||||
|
chooseReference(merge.getLastmetadataupdate(), enrich.getLastmetadataupdate(), trust));
|
||||||
|
merge
|
||||||
|
.setMetadataversionnumber(
|
||||||
|
chooseReference(merge.getMetadataversionnumber(), enrich.getMetadataversionnumber(), trust));
|
||||||
merge.setGeolocation(unionDistinctLists(merge.getGeolocation(), enrich.getGeolocation(), trust));
|
merge.setGeolocation(unionDistinctLists(merge.getGeolocation(), enrich.getGeolocation(), trust));
|
||||||
|
|
||||||
return merge;
|
return merge;
|
||||||
|
@ -445,16 +519,27 @@ public class MergeUtils {
|
||||||
|
|
||||||
merged.setLegalshortname(chooseReference(merged.getLegalshortname(), enrich.getLegalshortname(), trust));
|
merged.setLegalshortname(chooseReference(merged.getLegalshortname(), enrich.getLegalshortname(), trust));
|
||||||
merged.setLegalname(chooseReference(merged.getLegalname(), enrich.getLegalname(), trust));
|
merged.setLegalname(chooseReference(merged.getLegalname(), enrich.getLegalname(), trust));
|
||||||
merged.setAlternativeNames(unionDistinctLists(enrich.getAlternativeNames(), merged.getAlternativeNames(), trust));
|
merged
|
||||||
|
.setAlternativeNames(unionDistinctLists(enrich.getAlternativeNames(), merged.getAlternativeNames(), trust));
|
||||||
merged.setWebsiteurl(chooseReference(merged.getWebsiteurl(), enrich.getWebsiteurl(), trust));
|
merged.setWebsiteurl(chooseReference(merged.getWebsiteurl(), enrich.getWebsiteurl(), trust));
|
||||||
merged.setLogourl(chooseReference(merged.getLogourl(), enrich.getLogourl(), trust));
|
merged.setLogourl(chooseReference(merged.getLogourl(), enrich.getLogourl(), trust));
|
||||||
merged.setEclegalbody(chooseReference(merged.getEclegalbody(), enrich.getEclegalbody(), trust));
|
merged.setEclegalbody(chooseReference(merged.getEclegalbody(), enrich.getEclegalbody(), trust));
|
||||||
merged.setEclegalperson(chooseReference(merged.getEclegalperson(), enrich.getEclegalperson(), trust));
|
merged.setEclegalperson(chooseReference(merged.getEclegalperson(), enrich.getEclegalperson(), trust));
|
||||||
merged.setEcnonprofit(chooseReference(merged.getEcnonprofit(), enrich.getEcnonprofit(), trust));
|
merged.setEcnonprofit(chooseReference(merged.getEcnonprofit(), enrich.getEcnonprofit(), trust));
|
||||||
merged.setEcresearchorganization(chooseReference(merged.getEcresearchorganization(), enrich.getEcresearchorganization(), trust));
|
merged
|
||||||
merged.setEchighereducation(chooseReference(merged.getEchighereducation(), enrich.getEchighereducation(), trust));
|
.setEcresearchorganization(
|
||||||
merged.setEcinternationalorganizationeurinterests(chooseReference(merged.getEcinternationalorganizationeurinterests(), enrich.getEcinternationalorganizationeurinterests(), trust));
|
chooseReference(merged.getEcresearchorganization(), enrich.getEcresearchorganization(), trust));
|
||||||
merged.setEcinternationalorganization(chooseReference(merged.getEcinternationalorganization(), enrich.getEcinternationalorganization(), trust));
|
merged
|
||||||
|
.setEchighereducation(chooseReference(merged.getEchighereducation(), enrich.getEchighereducation(), trust));
|
||||||
|
merged
|
||||||
|
.setEcinternationalorganizationeurinterests(
|
||||||
|
chooseReference(
|
||||||
|
merged.getEcinternationalorganizationeurinterests(),
|
||||||
|
enrich.getEcinternationalorganizationeurinterests(), trust));
|
||||||
|
merged
|
||||||
|
.setEcinternationalorganization(
|
||||||
|
chooseReference(
|
||||||
|
merged.getEcinternationalorganization(), enrich.getEcinternationalorganization(), trust));
|
||||||
merged.setEcenterprise(chooseReference(merged.getEcenterprise(), enrich.getEcenterprise(), trust));
|
merged.setEcenterprise(chooseReference(merged.getEcenterprise(), enrich.getEcenterprise(), trust));
|
||||||
merged.setEcsmevalidated(chooseReference(merged.getEcsmevalidated(), enrich.getEcsmevalidated(), trust));
|
merged.setEcsmevalidated(chooseReference(merged.getEcsmevalidated(), enrich.getEcsmevalidated(), trust));
|
||||||
merged.setEcnutscode(chooseReference(merged.getEcnutscode(), enrich.getEcnutscode(), trust));
|
merged.setEcnutscode(chooseReference(merged.getEcnutscode(), enrich.getEcnutscode(), trust));
|
||||||
|
@ -477,7 +562,9 @@ public class MergeUtils {
|
||||||
merged.setKeywords(chooseReference(merged.getKeywords(), enrich.getKeywords(), trust));
|
merged.setKeywords(chooseReference(merged.getKeywords(), enrich.getKeywords(), trust));
|
||||||
merged.setDuration(chooseReference(merged.getDuration(), enrich.getDuration(), trust));
|
merged.setDuration(chooseReference(merged.getDuration(), enrich.getDuration(), trust));
|
||||||
merged.setEcsc39(chooseReference(merged.getEcsc39(), enrich.getEcsc39(), trust));
|
merged.setEcsc39(chooseReference(merged.getEcsc39(), enrich.getEcsc39(), trust));
|
||||||
merged.setOamandatepublications(chooseReference(merged.getOamandatepublications(), enrich.getOamandatepublications(), trust));
|
merged
|
||||||
|
.setOamandatepublications(
|
||||||
|
chooseReference(merged.getOamandatepublications(), enrich.getOamandatepublications(), trust));
|
||||||
merged.setEcarticle29_3(chooseReference(merged.getEcarticle29_3(), enrich.getEcarticle29_3(), trust));
|
merged.setEcarticle29_3(chooseReference(merged.getEcarticle29_3(), enrich.getEcarticle29_3(), trust));
|
||||||
merged.setSubjects(unionDistinctLists(merged.getSubjects(), enrich.getSubjects(), trust));
|
merged.setSubjects(unionDistinctLists(merged.getSubjects(), enrich.getSubjects(), trust));
|
||||||
merged.setFundingtree(unionDistinctLists(merged.getFundingtree(), enrich.getFundingtree(), trust));
|
merged.setFundingtree(unionDistinctLists(merged.getFundingtree(), enrich.getFundingtree(), trust));
|
||||||
|
@ -502,12 +589,13 @@ public class MergeUtils {
|
||||||
merged.setH2020topicdescription(enrich.getH2020topicdescription());
|
merged.setH2020topicdescription(enrich.getH2020topicdescription());
|
||||||
}
|
}
|
||||||
|
|
||||||
merged.setH2020classification(unionDistinctLists(merged.getH2020classification(), enrich.getH2020classification(), trust));
|
merged
|
||||||
|
.setH2020classification(
|
||||||
|
unionDistinctLists(merged.getH2020classification(), enrich.getH2020classification(), trust));
|
||||||
|
|
||||||
return merged;
|
return merged;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Longest lists list.
|
* Longest lists list.
|
||||||
*
|
*
|
||||||
|
@ -679,12 +767,18 @@ public class MergeUtils {
|
||||||
merge.setInstanceTypeMapping(firstNonNull(merge.getInstanceTypeMapping(), enrichment.getInstanceTypeMapping()));
|
merge.setInstanceTypeMapping(firstNonNull(merge.getInstanceTypeMapping(), enrichment.getInstanceTypeMapping()));
|
||||||
merge.setHostedby(firstNonNull(merge.getHostedby(), enrichment.getHostedby()));
|
merge.setHostedby(firstNonNull(merge.getHostedby(), enrichment.getHostedby()));
|
||||||
merge.setUrl(unionDistinctLists(merge.getUrl(), enrichment.getUrl(), 0));
|
merge.setUrl(unionDistinctLists(merge.getUrl(), enrichment.getUrl(), 0));
|
||||||
merge.setDistributionlocation(firstNonNull(merge.getDistributionlocation(), enrichment.getDistributionlocation()));
|
merge
|
||||||
|
.setDistributionlocation(
|
||||||
|
firstNonNull(merge.getDistributionlocation(), enrichment.getDistributionlocation()));
|
||||||
merge.setCollectedfrom(firstNonNull(merge.getCollectedfrom(), enrichment.getCollectedfrom()));
|
merge.setCollectedfrom(firstNonNull(merge.getCollectedfrom(), enrichment.getCollectedfrom()));
|
||||||
// pid and alternateId are used for matching
|
// pid and alternateId are used for matching
|
||||||
merge.setDateofacceptance(firstNonNull(merge.getDateofacceptance(), enrichment.getDateofacceptance()));
|
merge.setDateofacceptance(firstNonNull(merge.getDateofacceptance(), enrichment.getDateofacceptance()));
|
||||||
merge.setProcessingchargeamount(firstNonNull(merge.getProcessingchargeamount(), enrichment.getProcessingchargeamount()));
|
merge
|
||||||
merge.setProcessingchargecurrency(firstNonNull(merge.getProcessingchargecurrency(), enrichment.getProcessingchargecurrency()));
|
.setProcessingchargeamount(
|
||||||
|
firstNonNull(merge.getProcessingchargeamount(), enrichment.getProcessingchargeamount()));
|
||||||
|
merge
|
||||||
|
.setProcessingchargecurrency(
|
||||||
|
firstNonNull(merge.getProcessingchargecurrency(), enrichment.getProcessingchargecurrency()));
|
||||||
merge.setRefereed(firstNonNull(merge.getRefereed(), enrichment.getRefereed()));
|
merge.setRefereed(firstNonNull(merge.getRefereed(), enrichment.getRefereed()));
|
||||||
merge.setMeasures(unionDistinctLists(merge.getMeasures(), enrichment.getMeasures(), 0));
|
merge.setMeasures(unionDistinctLists(merge.getMeasures(), enrichment.getMeasures(), 0));
|
||||||
merge.setFulltext(firstNonNull(merge.getFulltext(), enrichment.getFulltext()));
|
merge.setFulltext(firstNonNull(merge.getFulltext(), enrichment.getFulltext()));
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
public class ModelHardLimits {
|
||||||
|
|
||||||
|
private ModelHardLimits() {}
|
||||||
|
|
||||||
|
public static final String LAYOUT = "index";
|
||||||
|
public static final String INTERPRETATION = "openaire";
|
||||||
|
public static final String SEPARATOR = "-";
|
||||||
|
|
||||||
|
public static final int MAX_EXTERNAL_ENTITIES = 50;
|
||||||
|
public static final int MAX_AUTHORS = 200;
|
||||||
|
public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000;
|
||||||
|
public static final int MAX_TITLE_LENGTH = 5000;
|
||||||
|
public static final int MAX_TITLES = 10;
|
||||||
|
public static final int MAX_ABSTRACTS = 10;
|
||||||
|
public static final int MAX_ABSTRACT_LENGTH = 150000;
|
||||||
|
public static final int MAX_RELATED_ABSTRACT_LENGTH = 500;
|
||||||
|
public static final int MAX_INSTANCES = 10;
|
||||||
|
|
||||||
|
public static String getCollectionName(String format) {
|
||||||
|
return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
public class OrganizationPidComparator implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||||
|
|
||||||
|
PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
|
||||||
|
PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.openorgs))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.openorgs))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.GRID))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.GRID))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.mag_id))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.mag_id))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.urn))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.urn))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,8 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
|
||||||
|
public class PidBlacklist extends HashMap<String, HashSet<String>> {
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public class PidBlacklistProvider {
|
||||||
|
|
||||||
|
private static final PidBlacklist blacklist;
|
||||||
|
|
||||||
|
static {
|
||||||
|
try {
|
||||||
|
String json = IOUtils.toString(IdentifierFactory.class.getResourceAsStream("pid_blacklist.json"));
|
||||||
|
blacklist = new ObjectMapper().readValue(json, PidBlacklist.class);
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PidBlacklist getBlacklist() {
|
||||||
|
return blacklist;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Set<String> getBlacklist(String pidType) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(getBlacklist().get(pidType))
|
||||||
|
.orElse(new HashSet<>());
|
||||||
|
}
|
||||||
|
|
||||||
|
private PidBlacklistProvider() {}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
public class PidComparator<T extends OafEntity> implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
|
private final T entity;
|
||||||
|
|
||||||
|
public PidComparator(T entity) {
|
||||||
|
this.entity = entity;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||||
|
|
||||||
|
if (left == null && right == null)
|
||||||
|
return 0;
|
||||||
|
if (left == null)
|
||||||
|
return 1;
|
||||||
|
if (right == null)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (ModelSupport.isSubClass(entity, Result.class)) {
|
||||||
|
return compareResultPids(left, right);
|
||||||
|
}
|
||||||
|
if (ModelSupport.isSubClass(entity, Organization.class)) {
|
||||||
|
return compareOrganizationtPids(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Else (but unlikely), lexicographical ordering will do.
|
||||||
|
return left.getQualifier().getClassid().compareTo(right.getQualifier().getClassid());
|
||||||
|
}
|
||||||
|
|
||||||
|
private int compareResultPids(StructuredProperty left, StructuredProperty right) {
|
||||||
|
return new ResultPidComparator().compare(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int compareOrganizationtPids(StructuredProperty left, StructuredProperty right) {
|
||||||
|
return new OrganizationPidComparator().compare(left, right);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.EnumUtils;
|
||||||
|
|
||||||
|
public enum PidType {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The DOI syntax shall be made up of a DOI prefix and a DOI suffix separated by a forward slash.
|
||||||
|
*
|
||||||
|
* There is no defined limit on the length of the DOI name, or of the DOI prefix or DOI suffix.
|
||||||
|
*
|
||||||
|
* The DOI name is case-insensitive and can incorporate any printable characters from the legal graphic characters
|
||||||
|
* of Unicode. Further constraints on character use (e.g. use of language-specific alphanumeric characters) can be
|
||||||
|
* defined for an application by the ISO 26324 Registration Authority.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* DOI prefix: The DOI prefix shall be composed of a directory indicator followed by a registrant code.
|
||||||
|
* These two components shall be separated by a full stop (period). The directory indicator shall be "10" and
|
||||||
|
* distinguishes the entire set of character strings (prefix and suffix) as digital object identifiers within the
|
||||||
|
* resolution system.
|
||||||
|
*
|
||||||
|
* Registrant code: The second element of the DOI prefix shall be the registrant code. The registrant code is a
|
||||||
|
* unique string assigned to a registrant.
|
||||||
|
*
|
||||||
|
* DOI suffix: The DOI suffix shall consist of a character string of any length chosen by the registrant.
|
||||||
|
* Each suffix shall be unique to the prefix element that precedes it. The unique suffix can be a sequential number,
|
||||||
|
* or it might incorporate an identifier generated from or based on another system used by the registrant
|
||||||
|
* (e.g. ISAN, ISBN, ISRC, ISSN, ISTC, ISNI; in such cases, a preferred construction for such a suffix can be
|
||||||
|
* specified, as in Example 1).
|
||||||
|
*
|
||||||
|
* Source: https://www.doi.org/doi_handbook/2_Numbering.html#2.2
|
||||||
|
*/
|
||||||
|
doi,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PubMed Unique Identifier (PMID)
|
||||||
|
*
|
||||||
|
* This field is a 1-to-8 digit accession number with no leading zeros. It is present on all records and is the
|
||||||
|
* accession number for managing and disseminating records. PMIDs are not reused after records are deleted.
|
||||||
|
*
|
||||||
|
* Beginning in February 2012 PMIDs include extensions following a decimal point to account for article versions
|
||||||
|
* (e.g., 21804956.2). All citations are considered version 1 until replaced. The extended PMID is not displayed
|
||||||
|
* on the MEDLINE format.
|
||||||
|
*
|
||||||
|
* View the citation in abstract format in PubMed to access additional versions when available (see the article in
|
||||||
|
* the Jan-Feb 2012 NLM Technical Bulletin).
|
||||||
|
*
|
||||||
|
* Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid
|
||||||
|
*/
|
||||||
|
pmid,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This field contains the unique identifier for the cited article in PubMed Central. The identifier begins with the
|
||||||
|
* prefix PMC.
|
||||||
|
*
|
||||||
|
* Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc
|
||||||
|
*/
|
||||||
|
pmc, handle, arXiv, nct, pdb, w3id,
|
||||||
|
|
||||||
|
// Organization
|
||||||
|
openorgs, ROR, GRID, PIC, ISNI, Wikidata, FundRef, corda, corda_h2020, mag_id, urn,
|
||||||
|
|
||||||
|
// Used by dedup
|
||||||
|
undefined, original;
|
||||||
|
|
||||||
|
public static boolean isValid(String type) {
|
||||||
|
return EnumUtils.isValidEnum(PidType.class, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PidType tryValueOf(String s) {
|
||||||
|
try {
|
||||||
|
return PidType.valueOf(s);
|
||||||
|
} catch (Exception e) {
|
||||||
|
return PidType.original;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public class PidValueComparator implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||||
|
|
||||||
|
if (left == null && right == null)
|
||||||
|
return 0;
|
||||||
|
if (left == null)
|
||||||
|
return 1;
|
||||||
|
if (right == null)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
StructuredProperty l = CleaningFunctions.normalizePidValue(left);
|
||||||
|
StructuredProperty r = CleaningFunctions.normalizePidValue(right);
|
||||||
|
|
||||||
|
return Optional
|
||||||
|
.ofNullable(l.getValue())
|
||||||
|
.map(
|
||||||
|
lv -> Optional
|
||||||
|
.ofNullable(r.getValue())
|
||||||
|
.map(rv -> lv.compareTo(rv))
|
||||||
|
.orElse(-1))
|
||||||
|
.orElse(1);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
public class ResultPidComparator implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||||
|
|
||||||
|
PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
|
||||||
|
PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.doi))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.doi))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.pmid))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.pmid))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.pmc))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.pmc))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.handle))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.handle))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.arXiv))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.arXiv))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.nct))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.nct))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.pdb))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.pdb))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID;
|
||||||
|
|
||||||
|
public class ResultTypeComparator implements Comparator<Result> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(Result left, Result right) {
|
||||||
|
|
||||||
|
if (left == null && right == null)
|
||||||
|
return 0;
|
||||||
|
if (left == null)
|
||||||
|
return 1;
|
||||||
|
if (right == null)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
HashSet<String> lCf = getCollectedFromIds(left);
|
||||||
|
HashSet<String> rCf = getCollectedFromIds(right);
|
||||||
|
|
||||||
|
if (lCf.contains(CROSSREF_ID) && !rCf.contains(CROSSREF_ID)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (!lCf.contains(CROSSREF_ID) && rCf.contains(CROSSREF_ID)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
String lClass = left.getResulttype().getClassid();
|
||||||
|
String rClass = right.getResulttype().getClassid();
|
||||||
|
|
||||||
|
if (lClass.equals(rClass))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
// Else (but unlikely), lexicographical ordering will do.
|
||||||
|
return lClass.compareTo(rClass);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected HashSet<String> getCollectedFromIds(Result left) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(left.getCollectedfrom())
|
||||||
|
.map(
|
||||||
|
cf -> cf
|
||||||
|
.stream()
|
||||||
|
.map(KeyValue::getKey)
|
||||||
|
.collect(Collectors.toCollection(HashSet::new)))
|
||||||
|
.orElse(new HashSet<>());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
class BlackListProviderTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void blackListTest() {
|
||||||
|
|
||||||
|
Assertions.assertNotNull(PidBlacklistProvider.getBlacklist());
|
||||||
|
Assertions.assertNotNull(PidBlacklistProvider.getBlacklist().get("doi"));
|
||||||
|
Assertions.assertTrue(PidBlacklistProvider.getBlacklist().get("doi").size() > 0);
|
||||||
|
final Set<String> xxx = PidBlacklistProvider.getBlacklist("xxx");
|
||||||
|
Assertions.assertNotNull(xxx);
|
||||||
|
Assertions.assertEquals(0, xxx.size());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,85 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
|
|
||||||
|
class IdentifierFactoryTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||||
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCreateIdentifierForPublication() throws IOException {
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi1.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi2.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi3.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi5.json", "50|doi_________::3bef95c0ca26dd55451fc8839ea69d27", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_pmc2.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_openapc.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
|
||||||
|
|
||||||
|
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
|
||||||
|
verifyIdentifier("publication_3.json", defaultID, true);
|
||||||
|
verifyIdentifier("publication_4.json", defaultID, true);
|
||||||
|
verifyIdentifier("publication_5.json", defaultID, true);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCreateIdentifierForPublicationNoHash() throws IOException {
|
||||||
|
|
||||||
|
verifyIdentifier("publication_doi1.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
|
||||||
|
verifyIdentifier("publication_doi2.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
|
||||||
|
verifyIdentifier("publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_urn1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
|
||||||
|
|
||||||
|
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
|
||||||
|
verifyIdentifier("publication_3.json", defaultID, false);
|
||||||
|
verifyIdentifier("publication_4.json", defaultID, false);
|
||||||
|
verifyIdentifier("publication_5.json", defaultID, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCreateIdentifierForROHub() throws IOException {
|
||||||
|
verifyIdentifier(
|
||||||
|
"orp-rohub.json", "50|w3id________::afc7592914ae190a50570db90f55f9c2", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void verifyIdentifier(String filename, String expectedID, boolean md5) throws IOException {
|
||||||
|
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
|
||||||
|
final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class);
|
||||||
|
|
||||||
|
String id = IdentifierFactory.createIdentifier(pub, md5);
|
||||||
|
System.out.println(id);
|
||||||
|
assertNotNull(id);
|
||||||
|
assertEquals(expectedID, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -5,11 +5,14 @@ import static org.junit.jupiter.api.Assertions.*;
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import com.google.common.collect.Lists;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import org.apache.commons.beanutils.BeanUtils;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
@ -17,16 +20,33 @@ import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
|
|
||||||
public class MergeUtilsTest {
|
public class MergeUtilsTest {
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMergePubs_new() throws IOException {
|
||||||
|
Publication pt = read("publication_test.json", Publication.class);
|
||||||
|
Publication p1 = read("publication_test.json", Publication.class);
|
||||||
|
|
||||||
|
assertEquals(1, pt.getCollectedfrom().size());
|
||||||
|
assertEquals(ModelConstants.CROSSREF_ID, pt.getCollectedfrom().get(0).getKey());
|
||||||
|
|
||||||
|
Instance i = new Instance();
|
||||||
|
i.setUrl(Lists.newArrayList("https://..."));
|
||||||
|
p1.getInstance().add(i);
|
||||||
|
|
||||||
|
Publication ptp1 = MergeUtils.mergePublication(pt, p1);
|
||||||
|
|
||||||
|
assertNotNull(ptp1.getInstance());
|
||||||
|
assertEquals(2, ptp1.getInstance().size());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testMergePubs() throws IOException {
|
void testMergePubs() throws IOException {
|
||||||
Publication p1 = read("publication_1.json", Publication.class);
|
Publication p1 = read("publication_1.json", Publication.class);
|
||||||
|
|
|
@ -178,8 +178,8 @@ class OafMapperUtilsTest {
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
||||||
MergeUtils
|
((Result) MergeUtils
|
||||||
.mergeResult(p2, d1)
|
.merge(p2, d1))
|
||||||
.getResulttype()
|
.getResulttype()
|
||||||
.getClassid());
|
.getClassid());
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0000/ra.v2i3.114::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.65008652949e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0000/ra.v2i3.114"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/(aj).v3i6.458::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.01810569717e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/(aj).v3i6.458"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/1587::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.39172290649e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/1587"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/462::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.33235333753e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.36"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.00285265116e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/462"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/731::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.01810569717e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/731"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/ijllis.v9i4.2066.g2482::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"8.48190886761e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/ijllis.v9i4.2066.g2482"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0118/alfahim.v3i1.140::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9.88840807598e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0118/alfahim.v3i1.140"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0166/fk2.stagefigshare.6442896.v3::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"7.28336930301e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0166/fk2.stagefigshare.6442896.v3"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0301/jttb.v2i1.64::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"7.28336930301e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0301/jttb.v2i1.64"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0809/seruni.v1i1.567::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"2.62959564033e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0809/seruni.v1i1.567"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0809/seruni.v2i1.765::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9.40178571921e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0559872"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"3.67659957614e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0809/seruni.v2i1.765"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0901/jkip.v7i3.485::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.26204125721e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0901/jkip.v7i3.485"}]}]}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]}
|
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[]}
|
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,33 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
|
||||||
|
"value": "Crossref"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
|
||||||
|
"value": "Crossref"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
|
||||||
|
"value": "Europe PubMed Central"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||||
|
"value": "Zenodo"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
|
||||||
|
"value": "Europe PubMed Central"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"id": "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||||
|
"value": "Zenodo"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "handle"},
|
||||||
|
"value": "11012/83840"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::2852",
|
||||||
|
"value": "Digital library of Brno University of Technology"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "handle"},
|
||||||
|
"value": "11012/83840"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||||
|
"value": "Zenodo"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.5281/zenodo.5121485"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
|
||||||
|
"value": "Europe PubMed Central"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,3 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resulttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ], "isGreen": null, "openAccessColor": "gold", "isInDiamondJournal": null, "publiclyFunded": null}
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resulttype" : { "classid" : "publication" }, "isGreen": true, "openAccessColor": "gold", "isInDiamondJournal": true, "publiclyFunded": false }
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1h", "resulttype" : { "classid" : "publication" }, "isGreen": false, "openAccessColor": null, "isInDiamondJournal": true, "publiclyFunded": false }
|
|
@ -0,0 +1,3 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resulttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ], "isGreen": null, "openAccessColor": "gold", "isInDiamondJournal": null, "publiclyFunded": null}
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resulttype" : { "classid" : "publication" }, "isGreen": true, "openAccessColor": "bronze", "isInDiamondJournal": true, "publiclyFunded": false }
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1h", "resulttype" : { "classid" : "publication" }, "isGreen": false, "openAccessColor": null, "isInDiamondJournal": true, "publiclyFunded": false }
|
|
@ -0,0 +1,31 @@
|
||||||
|
{
|
||||||
|
"id": "50|openapc_____::000023f9cb6e3a247c764daec4273cbc",
|
||||||
|
"resuttype": {
|
||||||
|
"classid": "publication"
|
||||||
|
},
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|apc_________::e2b1600b229fc30663c8a1f662debddf",
|
||||||
|
"value": "OpenAPC Global Initiative"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmid"},
|
||||||
|
"value": "25811027"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"url":["https://doi.org/10.1155/2015/439379"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
|
||||||
|
"value": "Europe PubMed Central"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier":{"classid":"pmc"},
|
||||||
|
"value":"21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,428 @@
|
||||||
|
{
|
||||||
|
"author": [
|
||||||
|
{
|
||||||
|
"affiliation": null,
|
||||||
|
"fullname": "Deymier, Ghislaine",
|
||||||
|
"name": "Ghislaine",
|
||||||
|
"pid": [],
|
||||||
|
"rank": 1,
|
||||||
|
"surname": "Deymier"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": null,
|
||||||
|
"fullname": "Gaschet, Frédéric",
|
||||||
|
"name": "Frédéric",
|
||||||
|
"pid": [],
|
||||||
|
"rank": 2,
|
||||||
|
"surname": "Gaschet"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": null,
|
||||||
|
"fullname": "Pouyanne, Guillaume",
|
||||||
|
"name": "Guillaume",
|
||||||
|
"pid": [],
|
||||||
|
"rank": 3,
|
||||||
|
"surname": "Pouyanne"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"bestaccessright": {
|
||||||
|
"classid": "OPEN",
|
||||||
|
"classname": "Open Access",
|
||||||
|
"schemeid": "dnet:access_modes",
|
||||||
|
"schemename": "dnet:access_modes"
|
||||||
|
},
|
||||||
|
"collectedfrom": [
|
||||||
|
{
|
||||||
|
"key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
|
||||||
|
"value" : "Crossref"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"context": [],
|
||||||
|
"contributor": [],
|
||||||
|
"country": [],
|
||||||
|
"coverage": [],
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"dateofacceptance": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "2013-11-30"
|
||||||
|
},
|
||||||
|
"dateofcollection": "2024-02-28T00:22:13+0000",
|
||||||
|
"dateoftransformation": "2024-03-06T08:43:13.253Z",
|
||||||
|
"description": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "For analyzing the reciprocal interaction between urban sprawl and car use, research has first focused on the link between urban density and mobility. By looking for a reduction in energy consumption, cities have favoured a compact planning development. Then reflection has broadened from the simple density to the wider, multi-dimensional concept of urban form. This controversy has led to a renewal of analysis in term of the costs of urban growth, notably by comparing the costs of \"compact\" and \"sprawled\" development. The idea is to compare the mobility costs of different urban forms. However, most often because of a lack of data, such studies are scarce. This paper suggests an innovative method to compute mobility costs at an infra-urban scale : The Spatialized Travel Account (STA). It is based on the CERTU's travel account methodology at a metropolitan scale. It puts forward an accurate estimate of the mobility costs for each transport mode (individual and public) and for each type of payer (households, firms, local authorities...). In order to test the relationships between mobility costs and urban form, we link the computed costs to morphological characteristics of infra-urban zones, taking in account sociodemographic characteristics of households."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "L'interaction réciproque entre étalement urbain et usage de l'automobile a conduit la recherche à se focaliser sur le lien entre les densités urbaines et la mobilité. En cherchant à réduire leur consommation d'énergie pour les transports, et donc leurs émissions de Gaz à Effet de Serre, les villes ont alors cherché à planifier la \" ville compacte \", privilégiant notamment la reconstruction de la ville sur elle-même et la densification. Par la suite, la réflexion s'est élargie de la simple densité à la notion de forme urbaine et à toutes ses dimensions. Cette controverse devait conduire à un renouveau des analyses en termes de coûts de la croissance urbaine : le débat reste vif, encore aujourd'hui, sur les coûts comparés de la ville étalée et de la ville compacte. Plus largement, il s'agit d'explorer les coûts des différentes formes urbaines en termes de mobilité. Malgré cela, généralement pour des raisons de disponibilité de données, les études sur le sujet restent extrêmement rares. Cet article propose un outil novateur pour mesurer les coûts de la mobilité à l'échelle intraurbaine : le Compte Déplacements Territorialisé (CDT). Il s'inspire de la méthode développée par le CERTU pour l'établissement des Comptes Déplacements Voyageurs à l'échelle métropolitaine. Le CDT propose, pour chacune des zones de l'agglomération, une estimation précise de l'ensemble des coûts liés aux déplacements de personnes, ventilés par mode de transport (individuels et collectifs) et par type de financeurs (ménages, entreprises, collectivités territoriales, etc.). Nous proposons une application de cette méthode à la controverse sur le lien entre forme urbaine et coûts de la mobilité. Les coûts sont reliés aux caractéristiques morphologiques des zones (en termes de densité et de diversité, notamment), en prenant soin de contrôler les facteurs socio-économiques qui influent traditionnellement sur les comportements de mobilité (taille du ménage, revenu, etc.)."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"eoscifguidelines": [],
|
||||||
|
"externalReference": [],
|
||||||
|
"extraInfo": [],
|
||||||
|
"format": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "application/pdf"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fulltext": [],
|
||||||
|
"id": "50|06cdd3ff4700::4826ac62a11a957fe332e2c291dcfcca",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"accessright": {
|
||||||
|
"classid": "OPEN",
|
||||||
|
"classname": "Open Access",
|
||||||
|
"schemeid": "dnet:access_modes",
|
||||||
|
"schemename": "dnet:access_modes"
|
||||||
|
},
|
||||||
|
"alternateIdentifier": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "Digital Object Identifier",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.46298/cst.12132"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|openaire____::6824b298c96ba906a3e6a70593affbf5",
|
||||||
|
"value": "Episciences"
|
||||||
|
},
|
||||||
|
"dateofacceptance": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "2013-11-30"
|
||||||
|
},
|
||||||
|
"hostedby": {
|
||||||
|
"key": "10|openaire____::6824b298c96ba906a3e6a70593affbf5",
|
||||||
|
"value": "Episciences"
|
||||||
|
},
|
||||||
|
"instanceTypeMapping": [
|
||||||
|
{
|
||||||
|
"originalType": "http://purl.org/coar/resource_type/c_6501",
|
||||||
|
"typeCode": "http://purl.org/coar/resource_type/c_6501",
|
||||||
|
"typeLabel": "journal article",
|
||||||
|
"vocabularyName": "openaire::coar_resource_types_3_1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"originalType": "http://purl.org/coar/resource_type/c_6501",
|
||||||
|
"typeCode": "Article",
|
||||||
|
"typeLabel": "Article",
|
||||||
|
"vocabularyName": "openaire::user_resource_types"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"instancetype": {
|
||||||
|
"classid": "0001",
|
||||||
|
"classname": "Article",
|
||||||
|
"schemeid": "dnet:publication_resource",
|
||||||
|
"schemename": "dnet:publication_resource"
|
||||||
|
},
|
||||||
|
"license": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "CC BY NC SA"
|
||||||
|
},
|
||||||
|
"pid": [],
|
||||||
|
"refereed": {
|
||||||
|
"classid": "0002",
|
||||||
|
"classname": "nonPeerReviewed",
|
||||||
|
"schemeid": "dnet:review_levels",
|
||||||
|
"schemename": "dnet:review_levels"
|
||||||
|
},
|
||||||
|
"url": [
|
||||||
|
"https://doi.org/10.46298/cst.12132",
|
||||||
|
"https://cst.episciences.org/12132"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"language": {
|
||||||
|
"classid": "fra/fre",
|
||||||
|
"classname": "French",
|
||||||
|
"schemeid": "dnet:languages",
|
||||||
|
"schemename": "dnet:languages"
|
||||||
|
},
|
||||||
|
"lastupdatetimestamp": 1710636106633,
|
||||||
|
"metaResourceType": {
|
||||||
|
"classid": "Research Literature",
|
||||||
|
"classname": "Research Literature",
|
||||||
|
"schemeid": "openaire::meta_resource_types",
|
||||||
|
"schemename": "openaire::meta_resource_types"
|
||||||
|
},
|
||||||
|
"originalId": [
|
||||||
|
"oai:episciences.org:cst:12132",
|
||||||
|
"50|06cdd3ff4700::4826ac62a11a957fe332e2c291dcfcca"
|
||||||
|
],
|
||||||
|
"pid": [],
|
||||||
|
"publisher": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "episciences.org"
|
||||||
|
},
|
||||||
|
"relevantdate": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "Accepted",
|
||||||
|
"classname": "Accepted",
|
||||||
|
"schemeid": "dnet:dataCite_date",
|
||||||
|
"schemename": "dnet:dataCite_date"
|
||||||
|
},
|
||||||
|
"value": "2024-02-11"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "issued",
|
||||||
|
"classname": "issued",
|
||||||
|
"schemeid": "dnet:dataCite_date",
|
||||||
|
"schemename": "dnet:dataCite_date"
|
||||||
|
},
|
||||||
|
"value": "2013-11-30"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "available",
|
||||||
|
"classname": "available",
|
||||||
|
"schemeid": "dnet:dataCite_date",
|
||||||
|
"schemename": "dnet:dataCite_date"
|
||||||
|
},
|
||||||
|
"value": "2013-11-30"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"resourcetype": {
|
||||||
|
"classid": "journal article",
|
||||||
|
"classname": "journal article",
|
||||||
|
"schemeid": "dnet:dataCite_resource",
|
||||||
|
"schemename": "dnet:dataCite_resource"
|
||||||
|
},
|
||||||
|
"resulttype": {
|
||||||
|
"classid": "publication",
|
||||||
|
"classname": "publication",
|
||||||
|
"schemeid": "dnet:result_typologies",
|
||||||
|
"schemename": "dnet:result_typologies"
|
||||||
|
},
|
||||||
|
"source": [],
|
||||||
|
"subject": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "keyword",
|
||||||
|
"classname": "keyword",
|
||||||
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
|
},
|
||||||
|
"value": "JEL: H - Public Economics/H.H7 - State and Local Government • Intergovernmental Relations/H.H7.H72 - State and Local Budget and Expenditures"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "keyword",
|
||||||
|
"classname": "keyword",
|
||||||
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
|
},
|
||||||
|
"value": "Local public finance"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "keyword",
|
||||||
|
"classname": "keyword",
|
||||||
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
|
},
|
||||||
|
"value": "JEL: R - Urban, Rural, Regional, Real Estate, and Transportation Economics/R.R5 - Regional Government Analysis/R.R5.R51 - Finance in Urban and Rural Economies"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "main title",
|
||||||
|
"classname": "main title",
|
||||||
|
"schemeid": "dnet:dataCite_title",
|
||||||
|
"schemename": "dnet:dataCite_title"
|
||||||
|
},
|
||||||
|
"value": "Urban form and the costs of daily mobility. The spatialized travel account tool and its application to the Bordeaux metropolitan area"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "urn"
|
||||||
|
},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "scp-number"
|
||||||
|
},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "pmcid"
|
||||||
|
},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
2
pom.xml
2
pom.xml
|
@ -888,7 +888,7 @@
|
||||||
<mockito-core.version>3.3.3</mockito-core.version>
|
<mockito-core.version>3.3.3</mockito-core.version>
|
||||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||||
<vtd.version>[2.12,3.0)</vtd.version>
|
<vtd.version>[2.12,3.0)</vtd.version>
|
||||||
<dhp-schemas.version>[4.17.2]</dhp-schemas.version>
|
<dhp-schemas.version>[6.1.0]</dhp-schemas.version>
|
||||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||||
|
|
Loading…
Reference in New Issue