[DHP Schema refactoring]

- move the business logic from the model class to  dhp-common
This commit is contained in:
Sandro La Bruzzo 2022-07-04 18:22:38 +02:00
parent e517f52e30
commit 27fbc9b385
38 changed files with 2617 additions and 234 deletions

View File

@ -0,0 +1,74 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.HashSet;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class CleaningFunctions {
public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
public static final String DOI_PREFIX = "10.";
public static final Set<String> PID_BLACKLIST = new HashSet<>();
static {
PID_BLACKLIST.add("none");
PID_BLACKLIST.add("na");
}
public CleaningFunctions() {}
/**
* Utility method that filter PID values on a per-type basis.
* @param s the PID whose value will be checked.
* @return false if the pid matches the filter criteria, true otherwise.
*/
public static boolean pidFilter(StructuredProperty s) {
final String pidValue = s.getValue();
if (Objects.isNull(s.getQualifier()) ||
StringUtils.isBlank(pidValue) ||
StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) {
return false;
}
if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) {
return false;
}
return !PidBlacklistProvider.getBlacklist(s.getQualifier().getClassid()).contains(pidValue);
}
/**
* Utility method that normalises PID values on a per-type basis.
* @param pid the PID whose value will be normalised.
* @return the PID containing the normalised value.
*/
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
pid.setValue(
normalizePidValue(
pid.getQualifier().getClassid(),
pid.getValue()));
return pid;
}
public static String normalizePidValue(String pidType, String pidValue) {
String value = Optional
.ofNullable(pidValue)
.map(String::trim)
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
switch (pidType) {
// TODO add cleaning for more PID types as needed
case "doi":
return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
}
return value;
}
}

View File

@ -0,0 +1,283 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import static com.google.common.base.Preconditions.checkArgument;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
/**
* Factory class for OpenAIRE identifiers in the Graph
*/
public class IdentifierFactory implements Serializable {
public static final String ID_SEPARATOR = "::";
public static final String ID_PREFIX_SEPARATOR = "|";
public static final int ID_PREFIX_LEN = 12;
/**
* Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] considered authoritative for that PID_TYPE
*/
public static final Map<PidType, HashBiMap<String, String>> PID_AUTHORITY = Maps.newHashMap();
static {
PID_AUTHORITY.put(PidType.doi, HashBiMap.create());
PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref");
PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite");
PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "ZENODO");
PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "Zenodo");
PID_AUTHORITY.put(PidType.pmc, HashBiMap.create());
PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
PID_AUTHORITY.get(PidType.pmc).put(PUBMED_CENTRAL_ID, "PubMed Central");
PID_AUTHORITY.put(PidType.pmid, HashBiMap.create());
PID_AUTHORITY.get(PidType.pmid).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
PID_AUTHORITY.get(PidType.pmid).put(PUBMED_CENTRAL_ID, "PubMed Central");
PID_AUTHORITY.put(PidType.arXiv, HashBiMap.create());
PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive");
}
/**
* Declares the associations PID_TYPE -> [DATASOURCE ID, PID SUBSTRING] considered as delegated authority for that
* PID_TYPE. Example, Zenodo is delegated to forge DOIs that contain the 'zenodo' word.
*/
public static final Map<PidType, Map<String, String>> DELEGATED_PID_AUTHORITY = Maps.newHashMap();
static {
DELEGATED_PID_AUTHORITY.put(PidType.doi, new HashMap<>());
DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "zenodo");
DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "zenodo");
}
/**
* Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] whose records are considered enrichment for the graph.
* Their OpenAIRE ID is built from the declared PID type. Are merged with their corresponding record, identified by
* the same OpenAIRE id.
*/
public static final Map<PidType, HashBiMap<String, String>> ENRICHMENT_PROVIDER = Maps.newHashMap();
static {
ENRICHMENT_PROVIDER.put(PidType.doi, HashBiMap.create());
ENRICHMENT_PROVIDER.get(PidType.doi).put(OPEN_APC_ID, OPEN_APC_NAME);
}
public static Set<String> delegatedAuthorityDatasourceIds() {
return DELEGATED_PID_AUTHORITY.values()
.stream()
.flatMap(m -> m.keySet().stream())
.collect(Collectors.toCollection(HashSet::new));
}
public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
}
public static <T extends Result> String createDOIBoostIdentifier(T entity) {
if (entity == null)
return null;
StructuredProperty pid = null;
if (entity.getPid() != null) {
pid = entity
.getPid()
.stream()
.filter(Objects::nonNull)
.filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid()))
.filter(CleaningFunctions::pidFilter)
.findAny()
.orElse(null);
} else {
if (entity.getInstance() != null) {
pid = entity
.getInstance()
.stream()
.filter(i -> i.getPid() != null)
.flatMap(i -> i.getPid().stream())
.filter(CleaningFunctions::pidFilter)
.findAny()
.orElse(null);
}
}
if (pid != null)
return idFromPid(entity, pid, true);
return null;
}
/**
* Creates an identifier from the most relevant PID (if available) provided by a known PID authority in the given
* entity T. Returns entity.id when none of the PIDs meet the selection criteria is available.
*
* @param entity the entity providing PIDs and a default ID.
* @param <T> the specific entity type. Currently Organization and Result subclasses are supported.
* @param md5 indicates whether should hash the PID value or not.
* @return an identifier from the most relevant PID, entity.id otherwise
*/
public static <T extends OafEntity> String createIdentifier(T entity, boolean md5) {
checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier");
final Map<String, Set<StructuredProperty>> pids = extractPids(entity);
return pids
.values()
.stream()
.flatMap(Set::stream)
.min(new PidComparator<>(entity))
.map(
min -> Optional
.ofNullable(pids.get(min.getQualifier().getClassid()))
.map(
p -> p
.stream()
.sorted(new PidValueComparator())
.findFirst()
.map(s -> idFromPid(entity, s, md5))
.orElseGet(entity::getId))
.orElseGet(entity::getId))
.orElseGet(entity::getId);
}
private static <T extends OafEntity> Map<String, Set<StructuredProperty>> extractPids(T entity) {
if (entity instanceof Result) {
return Optional
.ofNullable(((Result) entity).getInstance())
.map(IdentifierFactory::mapPids)
.orElse(new HashMap<>());
} else {
return entity
.getPid()
.stream()
.map(CleaningFunctions::normalizePidValue)
.filter(CleaningFunctions::pidFilter)
.collect(
Collectors
.groupingBy(
p -> p.getQualifier().getClassid(),
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
}
}
private static Map<String, Set<StructuredProperty>> mapPids(List<Instance> instance) {
return instance
.stream()
.map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false))
.flatMap(Function.identity())
.collect(
Collectors
.groupingBy(
p -> p.getQualifier().getClassid(),
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
}
private static Stream<StructuredProperty> pidFromInstance(List<StructuredProperty> pid, KeyValue collectedFrom,
boolean mapHandles) {
return Optional
.ofNullable(pid)
.map(
pp -> pp
.stream()
// filter away PIDs provided by a DS that is not considered an authority for the
// given PID Type
.filter(p -> shouldFilterPidByCriteria(collectedFrom, p, mapHandles))
.map(CleaningFunctions::normalizePidValue)
.filter(p -> isNotFromDelegatedAuthority(collectedFrom, p))
.filter(CleaningFunctions::pidFilter))
.orElse(Stream.empty());
}
private static boolean shouldFilterPidByCriteria(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) {
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
if (Objects.isNull(collectedFrom)) {
return false;
}
boolean isEnrich = Optional
.ofNullable(ENRICHMENT_PROVIDER.get(pType))
.map(enrich -> enrich.containsKey(collectedFrom.getKey())
|| enrich.containsValue(collectedFrom.getValue()))
.orElse(false);
boolean isAuthority = Optional
.ofNullable(PID_AUTHORITY.get(pType))
.map(authorities -> authorities.containsKey(collectedFrom.getKey())
|| authorities.containsValue(collectedFrom.getValue()))
.orElse(false);
return (mapHandles && pType.equals(PidType.handle)) || isEnrich || isAuthority;
}
private static boolean isNotFromDelegatedAuthority(KeyValue collectedFrom, StructuredProperty p) {
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
final Map<String, String> da = DELEGATED_PID_AUTHORITY.get(pType);
if (Objects.isNull(da)) {
return true;
}
if (!da.containsKey(collectedFrom.getKey())) {
return true;
}
return StringUtils.contains(p.getValue(), da.get(collectedFrom.getKey()));
}
/**
* @see {@link IdentifierFactory#createIdentifier(OafEntity, boolean)}
*/
public static <T extends OafEntity> String createIdentifier(T entity) {
return createIdentifier(entity, true);
}
private static <T extends OafEntity> String idFromPid(T entity, StructuredProperty s, boolean md5) {
return idFromPid(ModelSupport.getIdPrefix(entity.getClass()), s.getQualifier().getClassid(), s.getValue(), md5);
}
public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) {
return new StringBuilder()
.append(numericPrefix)
.append(ID_PREFIX_SEPARATOR)
.append(createPrefix(pidType))
.append(ID_SEPARATOR)
.append(md5 ? md5(pidValue) : pidValue)
.toString();
}
// create the prefix (length = 12)
private static String createPrefix(String pidType) {
StringBuilder prefix = new StringBuilder(StringUtils.left(pidType, ID_PREFIX_LEN));
while (prefix.length() < ID_PREFIX_LEN) {
prefix.append("_");
}
return prefix.substring(0, ID_PREFIX_LEN);
}
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) {
return null;
}
}
}

View File

@ -0,0 +1,24 @@
package eu.dnetlib.dhp.schema.oaf.utils;
public class ModelHardLimits {
private ModelHardLimits() {}
public static final String LAYOUT = "index";
public static final String INTERPRETATION = "openaire";
public static final String SEPARATOR = "-";
public static final int MAX_EXTERNAL_ENTITIES = 50;
public static final int MAX_AUTHORS = 200;
public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000;
public static final int MAX_TITLE_LENGTH = 5000;
public static final int MAX_TITLES = 10;
public static final int MAX_ABSTRACT_LENGTH = 150000;
public static final int MAX_INSTANCES = 10;
public static String getCollectionName(String format) {
return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
}
}

View File

@ -251,6 +251,9 @@ public class OafMapperUtils {
return p;
}
public static Journal journal(
final String name,
final String issnPrinted,

View File

@ -0,0 +1,765 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import com.google.common.base.Preconditions;
import eu.dnetlib.dhp.common.api.zenodo.File;
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import static com.google.common.base.Preconditions.checkArgument;
import static com.ibm.icu.text.PluralRules.Operand.e;
import java.text.ParseException;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* The type Oaf utils class groups all
* the utilities method used for OAF Generation
*/
public class OafUtils {
/**
* Normalize pid string.
*
* @param pid the pid
* @return the string
*/
public static String extractKeyFromPid(final StructuredProperty pid) {
if (pid == null)
return null;
final StructuredProperty normalizedPid = CleaningFunctions.normalizePidValue(pid);
return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue());
}
/**
* This method converts the list of instance enrichments
* into a Map where the key is the normalized identifier
* and the value is the instance itself
*
* @param ri the list of enrichment instances
* @return the result map
*/
public static Map<String, Instance> toInstanceMap(final List<Instance> ri) {
return ri
.stream()
.filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null)
.flatMap(i -> {
final List<Pair<String, Instance>> result = new ArrayList<>();
if (i.getPid() != null)
i.getPid().stream().filter(OafUtils::validPid).forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
if (i.getAlternateIdentifier() != null)
i.getAlternateIdentifier().stream().filter(OafUtils::validPid).forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
return result.stream();
}).collect(Collectors.toMap(
Pair::getLeft,
Pair::getRight,
(a, b) -> a
));
}
/**
* Valid pid boolean.
*
* @param p the p
* @return the boolean
*/
private static boolean validPid(final StructuredProperty p) {
return p.getValue()!= null && p.getQualifier()!= null && p.getQualifier().getClassid()!=null;
}
/**
* This utility method finds the list of enrichment instances
* that match one or more PIDs in the input list
*
* @param pids the list of PIDs
* @param enrichments the List of enrichment instances having the same pid
* @return the list
*/
private static List<Instance> findEnrichmentsByPID(final List<StructuredProperty> pids, final Map<String,Instance> enrichments) {
if (pids == null || enrichments == null)
return null;
return pids
.stream()
.map(OafUtils::extractKeyFromPid)
.map(enrichments::get)
.filter(Objects::nonNull)
.collect(Collectors.toList());
}
/**
* Is an enrichment boolean.
*
* @param e the e
* @return the boolean
*/
public static boolean isAnEnrichment(OafEntity e) {
return e.getDataInfo()!= null &&
e.getDataInfo().getProvenanceaction()!= null
&& ModelConstants.PROVENANCE_ENRICH.equalsIgnoreCase(e.getDataInfo().getProvenanceaction().getClassid());
}
/**
* This method apply enrichment on a single instance
* The enrichment consists of replacing values on
* single attribute only if in the current instance is missing
* The only repeatable field enriched is measures
*
* @param currentInstance the current instance
* @param enrichment the enrichment instance
*/
private static void applyEnrichment(final Instance currentInstance, final Instance enrichment) {
if (currentInstance == null || enrichment == null)
return;
//ENRICH accessright
if (enrichment.getAccessright()!=null && currentInstance.getAccessright() == null)
currentInstance.setAccessright(enrichment.getAccessright());
//ENRICH license
if (enrichment.getLicense()!=null && currentInstance.getLicense() == null)
currentInstance.setLicense(enrichment.getLicense());
//ENRICH instanceType
if (enrichment.getInstancetype()!=null && currentInstance.getInstancetype() == null)
currentInstance.setInstancetype(enrichment.getInstancetype());
//ENRICH hostedby
if (enrichment.getHostedby()!=null && currentInstance.getHostedby() == null)
currentInstance.setHostedby(enrichment.getHostedby());
//ENRICH distributionlocation
if (enrichment.getDistributionlocation()!=null && currentInstance.getDistributionlocation() == null)
currentInstance.setDistributionlocation(enrichment.getDistributionlocation());
//ENRICH collectedfrom
if (enrichment.getCollectedfrom()!=null && currentInstance.getCollectedfrom() == null)
currentInstance.setCollectedfrom(enrichment.getCollectedfrom());
//ENRICH dateofacceptance
if (enrichment.getDateofacceptance()!=null && currentInstance.getDateofacceptance() == null)
currentInstance.setDateofacceptance(enrichment.getDateofacceptance());
//ENRICH processingchargeamount
if (enrichment.getProcessingchargeamount()!=null && currentInstance.getProcessingchargeamount() == null)
currentInstance.setProcessingchargeamount(enrichment.getProcessingchargeamount());
//ENRICH refereed
if (enrichment.getRefereed()!=null && currentInstance.getRefereed() == null)
currentInstance.setRefereed(enrichment.getRefereed());
//ENRICH measures
if (enrichment.getMeasures()!=null)
if (currentInstance.getMeasures() == null)
currentInstance.setMeasures(enrichment.getMeasures());
else
enrichment.getMeasures().forEach(currentInstance.getMeasures()::add);
}
/**
* This main method apply the enrichment of the instances
*
* @param toEnrichInstances the instances that could be enriched
* @param enrichmentInstances the enrichment instances
* @return list of instances possibly enriched
*/
private static List<Instance> enrichInstances(final List<Instance> toEnrichInstances,final List<Instance> enrichmentInstances) {
final List<Instance> enrichmentResult = new ArrayList<>();
if (toEnrichInstances == null) {
return enrichmentResult;
}
if (enrichmentInstances == null) {
return enrichmentResult;
}
Map<String, Instance> ri = toInstanceMap(enrichmentInstances);
toEnrichInstances.forEach(i -> {
final List<Instance> e = findEnrichmentsByPID(i.getPid(), ri);
if (e!= null && e.size()> 0) {
e.forEach(enr -> applyEnrichment(i, enr));
} else {
final List<Instance> a = findEnrichmentsByPID(i.getAlternateIdentifier(), ri);
if (a!= null && a.size()> 0) {
a.forEach(enr -> applyEnrichment(i, enr));
}
}
enrichmentResult.add(i);
});
return enrichmentResult;
}
/**
* Longest lists list.
*
* @param a the a
* @param b the b
* @return the list
*/
public static List<Field<String>> longestLists(List<Field<String>> a, List<Field<String>> b) {
if (a == null || b == null)
return a == null ? b : a;
if (a.size() == b.size()) {
int msa = a
.stream()
.filter(i -> i != null && i.getValue() != null)
.map(i -> i.getValue().length())
.max(Comparator.naturalOrder())
.orElse(0);
int msb = b
.stream()
.filter(i -> i != null && i.getValue() != null)
.map(i -> i.getValue().length())
.max(Comparator.naturalOrder())
.orElse(0);
return msa > msb ? a : b;
}
return a.size() > b.size() ? a : b;
}
/**
* This method merge two oaf object of the same type
* It should replace the mergeFrom utility inside the oaf
* class.
*
* @param original First object
* @param enrich second object
* @return the merged object
*/
public static Oaf mergeOAF(Oaf original, Oaf enrich) {
if (original == null)
return enrich;
if (enrich == null)
return original;
if (original.getClass() != enrich.getClass())
throw new RuntimeException(String.format("Trying to merge different types of object %s, and %s",
original.getClass().getCanonicalName(),
enrich.getClass().getCanonicalName()));
Oaf result = null;
switch (original.getClass().getSimpleName().toLowerCase()) {
case "relation":
result = mergeOAFRelation((Relation) original, (Relation) enrich);
break;
case "publication":
result = mergeOAFPublication((Publication) original, (Publication) enrich);
break;
case "dataset":
result = mergeOAFDataset((Dataset) original, (Dataset) enrich);
break;
case "software":
result = mergeOAFSoftware((Software) original, (Software) enrich);
break;
case "otherresearchproduct":
result = mergeOAFOtherResearchProduct((OtherResearchProduct) original, (OtherResearchProduct) enrich);
break;
case "project":
result = mergeOAFProject((Project) original, (Project)enrich);
break;
case "organization":
result = mergeOAFOrganization((Organization) original, (Organization)enrich);
break;
}
checkArgument(Objects.nonNull(result), "Undefined type on merge, " +
"allowed: relation, publication,otherresearchproduct, software and dataset");
result.setCollectedfrom(
Stream
.concat(
Optional
.ofNullable(original.getCollectedfrom())
.map(Collection::stream)
.orElse(Stream.empty()),
Optional
.ofNullable(enrich.getCollectedfrom())
.map(Collection::stream)
.orElse(Stream.empty()))
.distinct() // relies on KeyValue.equals
.collect(Collectors.toList()));
result.setLastupdatetimestamp(
Math
.max(
Optional.ofNullable(original.getLastupdatetimestamp()).orElse(0L),
Optional.ofNullable(enrich.getLastupdatetimestamp()).orElse(0L)));
return result;
}
@SafeVarargs
public static <T> List<T> mergeLists(final List<T>... lists) {
return Arrays
.stream(lists)
.filter(Objects::nonNull)
.flatMap(List::stream)
.filter(Objects::nonNull)
.distinct()
.collect(Collectors.toList());
}
private static int compareTrust(Oaf a, Oaf b) {
return extractTrust(a).compareTo(extractTrust(b));
}
private static String extractTrust(Oaf e) {
if (e == null || e.getDataInfo() == null || e.getDataInfo().getTrust() == null)
return "0.0";
return e.getDataInfo().getTrust();
}
private static void mergeOAFDataInfo(Oaf from, Oaf to) {
Optional.ofNullable(to)
.ifPresent(other -> Optional.ofNullable(other.getDataInfo())
.ifPresent(otherDataInfo -> Optional.ofNullable(from.getDataInfo())
.ifPresent(thisDataInfo -> {
if (compareTrust(from, other) < 0 || thisDataInfo.getInvisible()) {
from.setDataInfo(otherDataInfo);
}
})));
}
/**
* Gets main title.
*
* @param titles the titles
* @return the main title
*/
private static StructuredProperty getMainTitle(List<StructuredProperty> titles) {
// need to check if the list of titles contains more than 1 main title? (in that case, we should chose which
// main title select in the list)
for (StructuredProperty t : titles) {
if (t.getQualifier() != null && t.getQualifier().getClassid() != null)
if (t.getQualifier().getClassid().equals("main title"))
return t;
}
return null;
}
private static OafEntity mergeEntity(OafEntity original, OafEntity enrich) {
final OafEntity mergedEntity = original;
mergedEntity.setOriginalId(mergeLists(mergedEntity.getOriginalId(), enrich.getOriginalId()));
mergedEntity.setPid(mergeLists(mergedEntity.getPid(), enrich.getPid()));
if (enrich.getDateofcollection() != null && compareTrust(mergedEntity, enrich) < 0)
mergedEntity.setDateofcollection(enrich.getDateofcollection());
if (enrich.getDateoftransformation() != null && compareTrust(mergedEntity, enrich) < 0)
mergedEntity.setDateoftransformation(enrich.getDateoftransformation());
mergedEntity.setExtraInfo(mergeLists(mergedEntity.getExtraInfo(), enrich.getExtraInfo()));
if (enrich.getOaiprovenance() != null && compareTrust(mergedEntity, enrich) < 0)
mergedEntity.setOaiprovenance(enrich.getOaiprovenance());
return mergedEntity;
}
private static Result mergeOAFResult(Result original, Result enrich) {
final Result mergedResult = (Result) mergeEntity(original, enrich);
if(mergedResult.getProcessingchargeamount() == null || StringUtils.isBlank(mergedResult.getProcessingchargeamount().getValue() )){
mergedResult.setProcessingchargeamount(enrich.getProcessingchargeamount());
mergedResult.setProcessingchargecurrency(enrich.getProcessingchargecurrency());
}
mergedResult.setMeasures(mergeLists(mergedResult.getMeasures(), enrich.getMeasures()));
if( !isAnEnrichment(mergedResult) && !isAnEnrichment(enrich))
mergedResult.setInstance(mergeLists(mergedResult.getInstance(), enrich.getInstance()));
else {
final List<Instance> enrichmentInstances = isAnEnrichment(mergedResult) ? mergedResult.getInstance() : enrich.getInstance();
final List<Instance> enrichedInstances= isAnEnrichment(mergedResult) ? enrich.getInstance(): mergedResult.getInstance();
if (isAnEnrichment(mergedResult))
mergedResult.setDataInfo(enrich.getDataInfo());
mergedResult.setInstance(enrichInstances(enrichedInstances,enrichmentInstances));
}
if (enrich.getBestaccessright() != null
&& new AccessRightComparator().compare(enrich.getBestaccessright(), mergedResult.getBestaccessright()) < 0)
mergedResult.setBestaccessright(enrich.getBestaccessright());
if (enrich.getResulttype() != null && compareTrust(mergedResult, enrich) < 0)
mergedResult.setResulttype(enrich.getResulttype());
if (enrich.getLanguage() != null && compareTrust(mergedResult, enrich) < 0)
mergedResult.setLanguage(enrich.getLanguage());
if (Objects.nonNull(enrich.getDateofacceptance())) {
if (Objects.isNull(mergedResult.getDateofacceptance())) {
mergedResult.setDateofacceptance(enrich.getDateofacceptance());
} else if (compareTrust(mergedResult, enrich) < 0) {
mergedResult.setDateofacceptance(enrich.getDateofacceptance());
}
}
mergedResult.setCountry(mergeLists(mergedResult.getCountry(), enrich.getCountry()));
mergedResult.setSubject(mergeLists(mergedResult.getSubject(), enrich.getSubject()));
// merge title lists: main title with higher trust and distinct between the others
StructuredProperty baseMainTitle = null;
if (mergedResult.getTitle() != null) {
baseMainTitle = getMainTitle(mergedResult.getTitle());
if (baseMainTitle != null) {
final StructuredProperty p = baseMainTitle;
mergedResult.setTitle(mergedResult.getTitle().stream().filter(t -> t != p).collect(Collectors.toList()));
}
}
StructuredProperty newMainTitle = null;
if (enrich.getTitle() != null) {
newMainTitle = getMainTitle(enrich.getTitle());
if (newMainTitle != null) {
final StructuredProperty p = newMainTitle;
enrich.setTitle(enrich.getTitle().stream().filter(t -> t != p).collect(Collectors.toList()));
}
}
if (newMainTitle != null && compareTrust(mergedResult, enrich) < 0) {
baseMainTitle = newMainTitle;
}
mergedResult.setTitle(mergeLists(mergedResult.getTitle(), enrich.getTitle()));
if (mergedResult.getTitle() != null && baseMainTitle != null) {
mergedResult.getTitle().add(baseMainTitle);
}
mergedResult.setRelevantdate (mergeLists(mergedResult.getRelevantdate(), enrich.getRelevantdate()));
mergedResult.setDescription( longestLists(mergedResult.getDescription(), enrich.getDescription()));
if (enrich.getPublisher() != null && compareTrust(mergedResult, enrich) < 0)
mergedResult.setPublisher(enrich.getPublisher());
if (enrich.getEmbargoenddate() != null && compareTrust(mergedResult, enrich) < 0)
mergedResult.setEmbargoenddate(enrich.getEmbargoenddate());
mergedResult.setSource(mergeLists(mergedResult.getSource(), enrich.getSource()));
mergedResult.setFulltext(mergeLists(mergedResult.getFulltext(), enrich.getFulltext()));
mergedResult.setFormat(mergeLists(mergedResult.getFormat(), enrich.getFormat()));
mergedResult.setContributor(mergeLists(mergedResult.getContributor(), enrich.getContributor()));
if (enrich.getResourcetype() != null)
mergedResult.setResourcetype(enrich.getResourcetype());
mergedResult.setCoverage(mergeLists(mergedResult.getCoverage(), enrich.getCoverage()));
mergedResult.setContext(mergeLists(mergedResult.getContext(), enrich.getContext()));
mergedResult.setExternalReference(mergeLists(mergedResult.getExternalReference(), enrich.getExternalReference()));
return mergedResult;
}
private static OtherResearchProduct mergeOAFOtherResearchProduct(OtherResearchProduct original, OtherResearchProduct enrich) {
final OtherResearchProduct mergedORP = (OtherResearchProduct) mergeOAFResult(original, enrich);
mergedORP.setContactperson(mergeLists(mergedORP.getContactperson(), enrich.getContactperson()));
mergedORP.setContactgroup(mergeLists(mergedORP.getContactgroup(), enrich.getContactgroup()));
mergedORP.setTool(mergeLists(mergedORP.getTool(), enrich.getTool()));
mergeOAFDataInfo(mergedORP, enrich);
return mergedORP;
}
private static Software mergeOAFSoftware(Software original, Software enrich) {
final Software mergedSoftware = (Software) mergeOAFResult(original, enrich);
mergedSoftware.setDocumentationUrl(mergeLists(mergedSoftware.getDocumentationUrl(), enrich.getDocumentationUrl()));
mergedSoftware.setLicense(mergeLists(mergedSoftware.getLicense(), enrich.getLicense()));
mergedSoftware.setCodeRepositoryUrl(enrich.getCodeRepositoryUrl() != null && compareTrust(mergedSoftware,enrich) < 0
? enrich.getCodeRepositoryUrl()
: mergedSoftware.getCodeRepositoryUrl());
mergedSoftware.setProgrammingLanguage(enrich.getProgrammingLanguage() != null && compareTrust(mergedSoftware, enrich) < 0
? enrich.getProgrammingLanguage()
: mergedSoftware.getProgrammingLanguage());
mergeOAFDataInfo(mergedSoftware, enrich);
return mergedSoftware;
}
private static Dataset mergeOAFDataset(Dataset original, Dataset enrich) {
final Dataset mergedDataset = (Dataset) mergeOAFResult(original, enrich);
mergedDataset.setStoragedate(enrich.getStoragedate() != null && compareTrust(mergedDataset, enrich) < 0 ? enrich.getStoragedate() : mergedDataset.getStoragedate());
mergedDataset.setDevice(enrich.getDevice() != null && compareTrust(mergedDataset, enrich) < 0 ? enrich.getDevice() : mergedDataset.getDevice());
mergedDataset.setSize(enrich.getSize() != null && compareTrust(mergedDataset, enrich) < 0 ? enrich.getSize() : mergedDataset.getSize());
mergedDataset.setVersion(enrich.getVersion() != null && compareTrust(mergedDataset, enrich) < 0 ? enrich.getVersion() : mergedDataset.getVersion());
mergedDataset.setLastmetadataupdate(
enrich.getLastmetadataupdate() != null && compareTrust(mergedDataset,enrich) < 0
? enrich.getLastmetadataupdate()
: mergedDataset.getLastmetadataupdate());
mergedDataset.setMetadataversionnumber(
enrich.getMetadataversionnumber() != null && compareTrust(mergedDataset, enrich) < 0
? enrich.getMetadataversionnumber()
: mergedDataset.getMetadataversionnumber());
mergedDataset.setGeolocation(mergeLists(mergedDataset.getGeolocation(), enrich.getGeolocation()));
mergeOAFDataInfo(mergedDataset, enrich);
return mergedDataset;
}
private static Publication mergeOAFPublication(Publication original, Publication enrich) {
final Publication mergedPublication = (Publication) mergeOAFResult(original, enrich);
if (enrich.getJournal() != null && compareTrust(mergedPublication, enrich) < 0)
mergedPublication.setJournal(enrich.getJournal());
mergeOAFDataInfo(mergedPublication, enrich);
return mergedPublication;
}
private static Relation mergeOAFRelation(Relation from, Relation to) {
checkArgument(Objects.equals(from.getSource(), to.getSource()), "source ids must be equal");
checkArgument(Objects.equals(from.getTarget(), to.getTarget()), "target ids must be equal");
checkArgument(Objects.equals(from.getRelType(), to.getRelType()), "relType(s) must be equal");
checkArgument(
Objects.equals(from.getSubRelType(), to.getSubRelType()), "subRelType(s) must be equal");
checkArgument(Objects.equals(from.getRelClass(), to.getRelClass()), "relClass(es) must be equal");
from.setValidated(from.getValidated() || to.getValidated());
try {
from.setValidationDate(ModelSupport.oldest(from.getValidationDate(), to.getValidationDate()));
} catch (ParseException e) {
throw new IllegalArgumentException(String
.format(
"invalid validation date format in relation [s:%s, t:%s]: %s", from.getSource(), from.getTarget(),
from.getValidationDate()));
}
return from;
}
private static Oaf mergeOAFOrganization(Organization original, Organization enrich) {
final Organization mergedOrganization = (Organization) mergeEntity(original, enrich);
int ct = compareTrust(mergedOrganization, enrich);
mergedOrganization.setLegalshortname(enrich.getLegalshortname() != null && ct < 0
? enrich.getLegalshortname()
: mergedOrganization.getLegalname());
mergedOrganization.setLegalname(enrich.getLegalname() != null && ct < 0 ?
enrich.getLegalname()
: mergedOrganization.getLegalname());
mergedOrganization.setAlternativeNames(mergeLists(enrich.getAlternativeNames(), mergedOrganization.getAlternativeNames()));
mergedOrganization.setWebsiteurl(enrich.getWebsiteurl() != null && ct < 0
? enrich.getWebsiteurl()
: mergedOrganization.getWebsiteurl());
mergedOrganization.setLogourl(enrich.getLogourl() != null && ct < 0
? enrich.getLogourl()
: mergedOrganization.getLogourl());
mergedOrganization.setEclegalbody(enrich.getEclegalbody() != null && ct < 0
? enrich.getEclegalbody()
: mergedOrganization.getEclegalbody());
mergedOrganization.setEclegalperson(enrich.getEclegalperson() != null && ct < 0
? enrich.getEclegalperson()
: mergedOrganization.getEclegalperson());
mergedOrganization.setEcnonprofit (enrich.getEcnonprofit() != null && ct< 0
? enrich.getEcnonprofit()
: mergedOrganization.getEcnonprofit());
mergedOrganization.setEcresearchorganization (enrich.getEcresearchorganization() != null && ct < 0
? enrich.getEcresearchorganization()
: mergedOrganization.getEcresearchorganization());
mergedOrganization.setEchighereducation (enrich.getEchighereducation() != null && ct < 0
? enrich.getEchighereducation()
: mergedOrganization.getEchighereducation());
mergedOrganization.setEcinternationalorganizationeurinterests (enrich.getEcinternationalorganizationeurinterests() != null && ct< 0
? enrich.getEcinternationalorganizationeurinterests()
: mergedOrganization.getEcinternationalorganizationeurinterests());
mergedOrganization.setEcinternationalorganization (enrich.getEcinternationalorganization() != null && ct < 0
? enrich.getEcinternationalorganization()
: mergedOrganization.getEcinternationalorganization());
mergedOrganization.setEcenterprise (enrich.getEcenterprise() != null && ct < 0
? enrich.getEcenterprise()
: mergedOrganization.getEcenterprise());
mergedOrganization.setEcsmevalidated (enrich.getEcsmevalidated() != null && ct < 0
? enrich.getEcsmevalidated()
: mergedOrganization.getEcsmevalidated());
mergedOrganization.setEcnutscode( enrich.getEcnutscode() != null && ct < 0
? enrich.getEcnutscode()
: mergedOrganization.getEcnutscode());
mergedOrganization.setCountry (enrich.getCountry() != null && ct < 0 ?
enrich.getCountry()
:mergedOrganization.getCountry());
mergeOAFDataInfo(mergedOrganization, enrich);
return mergedOrganization;
}
private static Oaf mergeOAFProject(Project original, Project enrich) {
final Project mergedProject = (Project) mergeEntity(original, enrich);
int ct = compareTrust(mergedProject, enrich);
mergedProject.setWebsiteurl (enrich.getWebsiteurl() != null && ct < 0
? enrich.getWebsiteurl()
: mergedProject.getWebsiteurl());
mergedProject.setCode(enrich.getCode() != null && ct < 0 ?
enrich.getCode() :
mergedProject.getCode());
mergedProject.setAcronym(enrich.getAcronym() != null && ct < 0
? enrich.getAcronym()
: mergedProject.getAcronym());
mergedProject.setTitle (enrich.getTitle() != null && ct < 0
? enrich.getTitle()
: mergedProject.getTitle());
mergedProject.setStartdate (enrich.getStartdate() != null && ct < 0
? enrich.getStartdate()
: mergedProject.getStartdate());
mergedProject.setEnddate (enrich.getEnddate() != null && ct < 0
? enrich.getEnddate()
: mergedProject.getEnddate());
mergedProject.setCallidentifier ( enrich.getCallidentifier() != null && ct < 0
? enrich.getCallidentifier()
: mergedProject.getCallidentifier());
mergedProject.setKeywords ( enrich.getKeywords() != null && ct < 0
? enrich.getKeywords()
: mergedProject.getKeywords());
mergedProject.setDuration ( enrich.getDuration() != null && ct < 0
? enrich.getDuration()
: mergedProject.getDuration());
mergedProject.setEcsc39 ( enrich.getEcsc39() != null && ct < 0
? enrich.getEcsc39() :
mergedProject.getEcsc39());
mergedProject.setOamandatepublications ( enrich.getOamandatepublications() != null && ct < 0
? enrich.getOamandatepublications()
: mergedProject.getOamandatepublications());
mergedProject.setEcarticle29_3 (enrich.getEcarticle29_3() != null && ct < 0
? enrich.getEcarticle29_3()
: mergedProject.getEcarticle29_3());
mergedProject.setSubjects (mergeLists(mergedProject.getSubjects(), enrich.getSubjects()));
mergedProject.setFundingtree (mergeLists(mergedProject.getFundingtree(), enrich.getFundingtree()));
mergedProject.setContracttype (enrich.getContracttype() != null && ct < 0
? enrich.getContracttype()
: mergedProject.getContracttype());
mergedProject.setOptional1 ( enrich.getOptional1() != null && ct < 0
? enrich.getOptional1()
: mergedProject.getOptional1());
mergedProject.setOptional2 (enrich.getOptional2() != null && ct < 0
? enrich.getOptional2()
: mergedProject.getOptional2());
mergedProject.setJsonextrainfo ( enrich.getJsonextrainfo() != null && ct < 0
? enrich.getJsonextrainfo()
: mergedProject.getJsonextrainfo());
mergedProject.setContactfullname ( enrich.getContactfullname() != null && ct < 0
? enrich.getContactfullname()
: mergedProject.getContactfullname());
mergedProject.setContactfax ( enrich.getContactfax() != null && ct < 0
? enrich.getContactfax()
: mergedProject.getContactfax());
mergedProject.setContactphone (enrich.getContactphone() != null && ct < 0
? enrich.getContactphone()
: mergedProject.getContactphone());
mergedProject.setContactemail ( enrich.getContactemail() != null && ct < 0
? enrich.getContactemail()
: mergedProject.getContactemail());
mergedProject.setSummary ( enrich.getSummary() != null && ct < 0
? enrich.getSummary()
: mergedProject.getSummary());
mergedProject.setCurrency( enrich.getCurrency() != null && ct < 0
? enrich.getCurrency()
: mergedProject.getCurrency());
if (enrich.getH2020topiccode() != null && StringUtils.isEmpty(mergedProject.getH2020topiccode())){
mergedProject.setH2020topiccode(enrich.getH2020topiccode());
mergedProject.setH2020topicdescription(enrich.getH2020topicdescription());
}
mergedProject.setH2020classification (mergeLists(mergedProject.getH2020classification(), enrich.getH2020classification()));
mergeOAFDataInfo(mergedProject, enrich);
return mergedProject;
}
}

View File

@ -0,0 +1,38 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class OrganizationPidComparator implements Comparator<StructuredProperty> {
@Override
public int compare(StructuredProperty left, StructuredProperty right) {
PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
if (lClass.equals(PidType.openorgs))
return -1;
if (rClass.equals(PidType.openorgs))
return 1;
if (lClass.equals(PidType.GRID))
return -1;
if (rClass.equals(PidType.GRID))
return 1;
if (lClass.equals(PidType.mag_id))
return -1;
if (rClass.equals(PidType.mag_id))
return 1;
if (lClass.equals(PidType.urn))
return -1;
if (rClass.equals(PidType.urn))
return 1;
return 0;
}
}

View File

@ -0,0 +1,8 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.HashMap;
import java.util.HashSet;
public class PidBlacklist extends HashMap<String, HashSet<String>> {
}

View File

@ -0,0 +1,39 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.io.IOException;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
public class PidBlacklistProvider {
private static final PidBlacklist blacklist;
static {
try {
String json = IOUtils.toString(IdentifierFactory.class.getResourceAsStream("pid_blacklist.json"));
blacklist = new ObjectMapper().readValue(json, PidBlacklist.class);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static PidBlacklist getBlacklist() {
return blacklist;
}
public static Set<String> getBlacklist(String pidType) {
return Optional
.ofNullable(getBlacklist().get(pidType))
.orElse(new HashSet<>());
}
private PidBlacklistProvider() {}
}

View File

@ -0,0 +1,48 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class PidComparator<T extends OafEntity> implements Comparator<StructuredProperty> {
private final T entity;
public PidComparator(T entity) {
this.entity = entity;
}
@Override
public int compare(StructuredProperty left, StructuredProperty right) {
if (left == null && right == null)
return 0;
if (left == null)
return 1;
if (right == null)
return -1;
if (ModelSupport.isSubClass(entity, Result.class)) {
return compareResultPids(left, right);
}
if (ModelSupport.isSubClass(entity, Organization.class)) {
return compareOrganizationtPids(left, right);
}
// Else (but unlikely), lexicographical ordering will do.
return left.getQualifier().getClassid().compareTo(right.getQualifier().getClassid());
}
private int compareResultPids(StructuredProperty left, StructuredProperty right) {
return new ResultPidComparator().compare(left, right);
}
private int compareOrganizationtPids(StructuredProperty left, StructuredProperty right) {
return new OrganizationPidComparator().compare(left, right);
}
}

View File

@ -0,0 +1,79 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import org.apache.commons.lang3.EnumUtils;
public enum PidType {
/**
* The DOI syntax shall be made up of a DOI prefix and a DOI suffix separated by a forward slash.
*
* There is no defined limit on the length of the DOI name, or of the DOI prefix or DOI suffix.
*
* The DOI name is case-insensitive and can incorporate any printable characters from the legal graphic characters
* of Unicode. Further constraints on character use (e.g. use of language-specific alphanumeric characters) can be
* defined for an application by the ISO 26324 Registration Authority.
*
*
* DOI prefix: The DOI prefix shall be composed of a directory indicator followed by a registrant code.
* These two components shall be separated by a full stop (period). The directory indicator shall be "10" and
* distinguishes the entire set of character strings (prefix and suffix) as digital object identifiers within the
* resolution system.
*
* Registrant code: The second element of the DOI prefix shall be the registrant code. The registrant code is a
* unique string assigned to a registrant.
*
* DOI suffix: The DOI suffix shall consist of a character string of any length chosen by the registrant.
* Each suffix shall be unique to the prefix element that precedes it. The unique suffix can be a sequential number,
* or it might incorporate an identifier generated from or based on another system used by the registrant
* (e.g. ISAN, ISBN, ISRC, ISSN, ISTC, ISNI; in such cases, a preferred construction for such a suffix can be
* specified, as in Example 1).
*
* Source: https://www.doi.org/doi_handbook/2_Numbering.html#2.2
*/
doi,
/**
* PubMed Unique Identifier (PMID)
*
* This field is a 1-to-8 digit accession number with no leading zeros. It is present on all records and is the
* accession number for managing and disseminating records. PMIDs are not reused after records are deleted.
*
* Beginning in February 2012 PMIDs include extensions following a decimal point to account for article versions
* (e.g., 21804956.2). All citations are considered version 1 until replaced. The extended PMID is not displayed
* on the MEDLINE format.
*
* View the citation in abstract format in PubMed to access additional versions when available (see the article in
* the Jan-Feb 2012 NLM Technical Bulletin).
*
* Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid
*/
pmid,
/**
* This field contains the unique identifier for the cited article in PubMed Central. The identifier begins with the
* prefix PMC.
*
* Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc
*/
pmc, handle, arXiv, nct, pdb,
// Organization
openorgs, corda, corda_h2020, GRID, mag_id, urn,
// Used by dedup
undefined, original;
public static boolean isValid(String type) {
return EnumUtils.isValidEnum(PidType.class, type);
}
public static PidType tryValueOf(String s) {
try {
return PidType.valueOf(s);
} catch (Exception e) {
return PidType.original;
}
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.Comparator;
import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class PidValueComparator implements Comparator<StructuredProperty> {
@Override
public int compare(StructuredProperty left, StructuredProperty right) {
if (left == null && right == null)
return 0;
if (left == null)
return 1;
if (right == null)
return -1;
StructuredProperty l = CleaningFunctions.normalizePidValue(left);
StructuredProperty r = CleaningFunctions.normalizePidValue(right);
return Optional
.ofNullable(l.getValue())
.map(
lv -> Optional
.ofNullable(r.getValue())
.map(rv -> lv.compareTo(rv))
.orElse(-1))
.orElse(1);
}
}

View File

@ -0,0 +1,53 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class ResultPidComparator implements Comparator<StructuredProperty> {
@Override
public int compare(StructuredProperty left, StructuredProperty right) {
PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
if (lClass.equals(PidType.doi))
return -1;
if (rClass.equals(PidType.doi))
return 1;
if (lClass.equals(PidType.pmid))
return -1;
if (rClass.equals(PidType.pmid))
return 1;
if (lClass.equals(PidType.pmc))
return -1;
if (rClass.equals(PidType.pmc))
return 1;
if (lClass.equals(PidType.handle))
return -1;
if (rClass.equals(PidType.handle))
return 1;
if (lClass.equals(PidType.arXiv))
return -1;
if (rClass.equals(PidType.arXiv))
return 1;
if (lClass.equals(PidType.nct))
return -1;
if (rClass.equals(PidType.nct))
return 1;
if (lClass.equals(PidType.pdb))
return -1;
if (rClass.equals(PidType.pdb))
return 1;
return 0;
}
}

View File

@ -0,0 +1,77 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Optional;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Result;
public class ResultTypeComparator implements Comparator<Result> {
@Override
public int compare(Result left, Result right) {
if (left == null && right == null)
return 0;
if (left == null)
return 1;
if (right == null)
return -1;
HashSet<String> lCf = getCollectedFromIds(left);
HashSet<String> rCf = getCollectedFromIds(right);
if (lCf.contains(CROSSREF_ID) && !rCf.contains(CROSSREF_ID)) {
return -1;
}
if (!lCf.contains(CROSSREF_ID) && rCf.contains(CROSSREF_ID)) {
return 1;
}
String lClass = left.getResulttype().getClassid();
String rClass = right.getResulttype().getClassid();
if (lClass.equals(rClass))
return 0;
if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
return -1;
if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
return 1;
if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
return -1;
if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
return 1;
if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
return -1;
if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
return 1;
if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
return -1;
if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
return 1;
// Else (but unlikely), lexicographical ordering will do.
return lClass.compareTo(rClass);
}
protected HashSet<String> getCollectedFromIds(Result left) {
return Optional
.ofNullable(left.getCollectedfrom())
.map(
cf -> cf
.stream()
.map(KeyValue::getKey)
.collect(Collectors.toCollection(HashSet::new)))
.orElse(new HashSet<>());
}
}

View File

@ -0,0 +1,21 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.util.Set;
class BlackListProviderTest {
@Test
void blackListTest() {
Assertions.assertNotNull(PidBlacklistProvider.getBlacklist());
Assertions.assertNotNull(PidBlacklistProvider.getBlacklist().get("doi"));
Assertions.assertTrue(PidBlacklistProvider.getBlacklist().get("doi").size() > 0);
final Set<String> xxx = PidBlacklistProvider.getBlacklist("xxx");
Assertions.assertNotNull(xxx);
Assertions.assertEquals(0, xxx.size());
}
}

View File

@ -0,0 +1,80 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Publication;
class IdentifierFactoryTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@Test
void testCreateIdentifierForPublication() throws IOException {
verifyIdentifier(
"publication_doi1.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
verifyIdentifier(
"publication_doi2.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
verifyIdentifier(
"publication_doi3.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
verifyIdentifier(
"publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true);
verifyIdentifier(
"publication_doi5.json", "50|doi_________::3bef95c0ca26dd55451fc8839ea69d27", true);
verifyIdentifier(
"publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true);
verifyIdentifier(
"publication_pmc2.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
verifyIdentifier(
"publication_openapc.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
verifyIdentifier("publication_3.json", defaultID, true);
verifyIdentifier("publication_4.json", defaultID, true);
verifyIdentifier("publication_5.json", defaultID, true);
}
@Test
void testCreateIdentifierForPublicationNoHash() throws IOException {
verifyIdentifier("publication_doi1.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
verifyIdentifier("publication_doi2.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
verifyIdentifier("publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
verifyIdentifier(
"publication_urn1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
verifyIdentifier("publication_3.json", defaultID, false);
verifyIdentifier("publication_4.json", defaultID, false);
verifyIdentifier("publication_5.json", defaultID, false);
}
protected void verifyIdentifier(String filename, String expectedID, boolean md5) throws IOException {
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class);
String id = IdentifierFactory.createIdentifier(pub, md5);
assertNotNull(id);
assertEquals(expectedID, id);
}
}

View File

@ -0,0 +1,617 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.io.InputStream;
import java.time.format.DateTimeParseException;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.*;
/**
* The type Merge test.
*/
class MergeTest {
/**
* The Oaf.
*/
OafEntity oaf;
/**
* Sets up.
*/
@BeforeEach
public void setUp() {
oaf = new Publication();
}
/**
* Merge lists test.
*/
@Test
@SuppressWarnings("unchecked")
void mergeListsTest() {
// string list merge test
List<String> a = Arrays.asList("a", "b", "c", "e");
List<String> b = Arrays.asList("a", "b", "c", "d");
List<String> c = null;
System.out.println("merge result 1 = " + OafUtils.mergeLists(a, b));
System.out.println("merge result 2 = " + OafUtils.mergeLists(a, c));
System.out.println("merge result 3 = " + OafUtils.mergeLists(c, c));
}
/**
* Merge publication collected from test.
*/
@Test
void mergePublicationCollectedFromTest() {
Publication a = publication();
Publication b = publication();
a.setCollectedfrom(Arrays.asList(setKV("a", "open"), setKV("b", "closed")));
b.setCollectedfrom(Arrays.asList(setKV("A", "open"), setKV("b", "Open")));
a = (Publication) OafUtils.mergeOAF(a, b);
assertNotNull(a.getCollectedfrom());
assertEquals(3, a.getCollectedfrom().size());
}
/**
* Load resource result list.
*
* @param <T> the type parameter
* @param path the path
* @param clazz the clazz
* @return the list
* @throws Exception the exception
*/
private <T extends Result> List<Result> loadResourceResult(final String path, final Class<T> clazz ) throws Exception {
final ObjectMapper mapper = new ObjectMapper();
final InputStream str = Objects.requireNonNull(getClass().getResourceAsStream(path));
// LOAD test publications
return IOUtils.readLines(str).stream().map(s -> {
try {
return mapper.readValue(s, clazz);
} catch (IOException e) {
return null;
}
}).filter(Objects::nonNull).collect(Collectors.toList());
}
/**
* Apply to any test list result the same pid of the enrichment instance
*
* @param source the source
* @param enrichment the enrichment
* @param overrideAlternateIdentifier the override alternate identifier
*/
private void updatePidIntoPublicationInstance(final List<Result> source, final List<Result>enrichment, final boolean overrideAlternateIdentifier) {
for(int i = 0 ; i< source.size(); i++) {
final Result currentPub = source.get(i);
final Result currentEnrichment = enrichment.get(i);
final Instance currentInstance = Objects.requireNonNull(currentPub.getInstance()).get(0);
if (overrideAlternateIdentifier)
currentInstance.setAlternateIdentifier(Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid());
else
currentInstance.setPid(Objects.requireNonNull(currentEnrichment.getInstance()).get(0).getPid());
}
}
private void applyAndVerifyEnrichment(final List<Result> l1, final List<Result> l2) {
// Apply Merge and verify that enrichments works
for(int i = 0 ; i< l1.size(); i++) {
final Result currentPub = l2.get(i);
final Result currentEnrichment = l1.get(i);
currentPub.mergeFrom(currentEnrichment);
assertEquals(1, currentPub.getInstance().size());
final Instance currentInstance = Objects.requireNonNull(currentPub.getInstance()).get(0);
assertNotNull(currentInstance.getMeasures());
assertNotNull(currentPub.getTitle());
assertFalse(Result.isAnEnrichment(currentPub));
}
}
/**
* Test the merge of the APC at the level of the result and the instance.
*
* @throws Exception the exception
*/
@Test
void testAPCMerge() throws Exception {
List<Result> publications = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/publication_apc.json", Publication.class);
System.out.println(publications.size());
publications.forEach(p -> assertEquals(1, p.getInstance().size()));
publications.forEach(p -> assertTrue(p.getProcessingchargeamount() != null ));
publications.forEach(p -> assertTrue(p.getProcessingchargecurrency() != null ));
publications.forEach(p -> assertTrue(StringUtils.isNotBlank(p.getProcessingchargeamount().getValue() )));
publications.forEach(p -> assertTrue(StringUtils.isNotBlank(p.getProcessingchargecurrency().getValue() )));
publications.forEach(p -> p.getInstance().stream()
.forEach(i -> assertTrue(i.getProcessingchargeamount() != null)));
publications.forEach(p -> p.getInstance().stream()
.forEach(i -> assertTrue(i.getProcessingchargecurrency() != null)));
publications.forEach(p -> p.getInstance().stream()
.forEach(i -> assertTrue(StringUtils.isNotBlank(i.getProcessingchargeamount().getValue()))));
publications.forEach(p -> p.getInstance().stream()
.forEach(i -> assertTrue(StringUtils.isNotBlank(i.getProcessingchargecurrency().getValue()))));
Result p1 = publications.get(0);
Result p2 = publications.get(1);
p1.mergeFrom(p2);
assertEquals("1721.47", p1.getProcessingchargeamount().getValue());
assertEquals("EUR", p1.getProcessingchargecurrency().getValue());
assertEquals(2 , p1.getInstance().size());
p1.getInstance().stream().forEach(i -> assertTrue(i.getProcessingchargeamount() != null));
p1.getInstance().stream().forEach(i -> assertTrue(i.getProcessingchargecurrency() != null));
p1.getInstance().stream().anyMatch(i -> i.getProcessingchargeamount().getValue().equals("2000.47"));
p1.getInstance().stream().anyMatch(i -> i.getProcessingchargeamount().getValue().equals("1721.47"));
p1.getInstance().stream().anyMatch(i -> i.getProcessingchargecurrency().getValue().equals("EUR"));
p1.getInstance().stream().anyMatch(i -> i.getProcessingchargecurrency().getValue().equals("USD"));
System.out.println(new ObjectMapper().writeValueAsString(p1));
}
@Test
void testAPCMerge2() throws Exception {
List<Result> publications = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/publication_apc2.json", Publication.class);
System.out.println(publications.size());
publications.forEach(p -> assertEquals(1, p.getInstance().size()));
assertTrue(publications.get(0).getProcessingchargeamount() != null );
assertTrue(publications.get(0).getProcessingchargecurrency() != null );
assertTrue(publications.get(1).getProcessingchargeamount() == null );
Result p1 = publications.get(1);
Result p2 = publications.get(0);
//merge visible record with OpenAPC
p1 = (Result) OafUtils.mergeOAF(p1, p2);
assertFalse(p1.getDataInfo().getInvisible());
assertEquals("1721.47", p1.getProcessingchargeamount().getValue());
assertEquals("EUR", p1.getProcessingchargecurrency().getValue());
assertEquals(2 , p1.getInstance().size());
p1.getInstance().stream().anyMatch(i -> i.getProcessingchargeamount() != null);
p1.getInstance().stream().anyMatch(i -> i.getProcessingchargecurrency() != null);
assertEquals("1721.47", p1.getInstance().stream().filter(i -> i.getProcessingchargeamount() != null)
.collect(Collectors.toList()).get(0).getProcessingchargeamount().getValue());
assertEquals("EUR", p1.getInstance().stream().filter(i -> i.getProcessingchargeamount() != null)
.collect(Collectors.toList()).get(0).getProcessingchargecurrency().getValue());
assertFalse(p1.getDataInfo().getInvisible());
System.out.println(new ObjectMapper().writeValueAsString(p1));
//merge OpenAPC with visible record
p2 = (Result) OafUtils.mergeOAF(p2, p1);
assertFalse(p2.getDataInfo().getInvisible());
assertEquals("1721.47", p2.getProcessingchargeamount().getValue());
assertEquals("EUR", p2.getProcessingchargecurrency().getValue());
assertEquals(2 , p2.getInstance().size());
p2.getInstance().stream().anyMatch(i -> i.getProcessingchargeamount() != null);
p2.getInstance().stream().anyMatch(i -> i.getProcessingchargecurrency() != null);
assertEquals("1721.47", p2.getInstance().stream().filter(i -> i.getProcessingchargeamount() != null)
.collect(Collectors.toList()).get(0).getProcessingchargeamount().getValue());
assertEquals("EUR", p2.getInstance().stream().filter(i -> i.getProcessingchargeamount() != null)
.collect(Collectors.toList()).get(0).getProcessingchargecurrency().getValue());
}
/**
* Test enrichment function.
*
* @throws Exception the exception
*/
@Test
void testEnrichment() throws Exception {
// 1 TEST UPDATING PID INSTANCE AND MERGE CURRENT PUBLICATION WITH ENRICHMENT
// LOAD test publications
List<Result> publications = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/publications.json", Publication.class);
// Assert that each publication has only one instance and inside that all the measure field is empty
publications.forEach(p -> {
assertEquals(1, p.getInstance().size());
final Instance currentInstance = Objects.requireNonNull(p.getInstance()).get(0);
assertNull(currentInstance.getMeasures());
});
// LOAD test enrichments
List<Result> enrichment = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/enrichment.json", Result.class);
updatePidIntoPublicationInstance(publications, enrichment, false);
applyAndVerifyEnrichment(publications, enrichment);
// 2 TEST UPDATING ALTERNATE ID INSTANCE AND MERGE CURRENT PUBLICATION WITH ENRICHMENT
publications = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/publications.json", Publication.class);
enrichment = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/enrichment.json", Result.class);
updatePidIntoPublicationInstance(publications, enrichment, true);
applyAndVerifyEnrichment(publications, enrichment);
// 3 TEST UPDATING PID INSTANCE AND MERGE ENRICHMENT WITH CURRENT PUBLICATION
publications = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/publications.json", Publication.class);
enrichment = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/enrichment.json", Result.class);
updatePidIntoPublicationInstance(publications, enrichment, false);
applyAndVerifyEnrichment( enrichment, publications);
// 4 TEST UPDATING ALTERNATE ID INSTANCE AND MERGE ENRICHMENT WITH CURRENT PUBLICATION
publications = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/publications.json", Publication.class);
enrichment = loadResourceResult("/eu/dnetlib/dhp/schema/oaf/utils/enrichment.json", Result.class);
updatePidIntoPublicationInstance(publications, enrichment, true);
applyAndVerifyEnrichment( enrichment, publications);
}
/**
* Merge publication date of acceptance test both present.
*/
@Test
void mergePublicationDateOfAcceptanceTest_bothPresent() {
Publication a = publication();
Publication b = publication();
a.setDateofacceptance(field("2021-06-18"));
b.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-18", a.getDateofacceptance().getValue());
}
/**
* Merge publication date of acceptance test both present 1.
*/
@Test
void mergePublicationDateOfAcceptanceTest_bothPresent_1() {
Publication a = publication("0.8");
Publication b = publication("0.9");
a.setDateofacceptance(field("2021-06-18"));
b.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
/**
* Merge publication date of acceptance test both present 2.
*/
@Test
void mergePublicationDateOfAcceptanceTest_bothPresent_2() {
Publication a = publication("0.9");
Publication b = publication("0.8");
a.setDateofacceptance(field("2021-06-18"));
b.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-18", a.getDateofacceptance().getValue());
}
/**
* Merge publication date of acceptance test left missing.
*/
@Test
void mergePublicationDateOfAcceptanceTest_leftMissing() {
Publication a = publication();
Publication b = publication();
b.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
/**
* Merge publication date of acceptance test left missing 1.
*/
@Test
void mergePublicationDateOfAcceptanceTest_leftMissing_1() {
Publication a = publication("0.9");
Publication b = publication("0.8");
b.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
/**
* Merge publication date of acceptance test left missing 2.
*/
@Test
void mergePublicationDateOfAcceptanceTest_leftMissing_2() {
Publication a = publication("0.8");
Publication b = publication("0.9");
b.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
/**
* Merge publication date of acceptance test right missing.
*/
@Test
void mergePublicationDateOfAcceptanceTest_rightMissing() {
Publication a = publication();
Publication b = publication();
a.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
/**
* Merge publication date of acceptance test right missing 1.
*/
@Test
void mergePublicationDateOfAcceptanceTest_rightMissing_1() {
Publication a = publication("0.8");
Publication b = publication("0.9");
a.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
/**
* Merge publication date of acceptance test right missing 2.
*/
@Test
void mergePublicationDateOfAcceptanceTest_rightMissing_2() {
Publication a = publication("0.9");
Publication b = publication("0.8");
a.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
/**
* Merge publication subject test.
*/
@Test
void mergePublicationSubjectTest() {
Publication a = publication();
Publication b = publication();
a.setSubject(Arrays.asList(setSP("a", "open", "classe"), setSP("b", "open", "classe")));
b.setSubject(Arrays.asList(setSP("A", "open", "classe"), setSP("c", "open", "classe")));
a.mergeFrom(b);
assertNotNull(a.getSubject());
assertEquals(3, a.getSubject().size());
}
/**
* Merge relation test.
*/
@Test
void mergeRelationTest() {
Relation a = createRel(null, null);
Relation b = createRel(null, null);
a.mergeFrom(b);
assertEquals(a, b);
a = createRel(true, null);
b = createRel(null, null);
a.mergeFrom(b);
assertEquals(true, a.getValidated());
a = createRel(true, null);
b = createRel(false, null);
a.mergeFrom(b);
assertEquals(true, a.getValidated());
a = createRel(true, null);
b = createRel(true, "2016-04-05T12:41:19.202Z");
a.mergeFrom(b);
assertEquals("2016-04-05T12:41:19.202Z", a.getValidationDate());
a = createRel(true, "2016-05-07T12:41:19.202Z");
b = createRel(true, "2016-04-05T12:41:19.202Z");
a.mergeFrom(b);
assertEquals("2016-04-05T12:41:19.202Z", a.getValidationDate());
a = createRel(true, "2020-09-10 11:08:52");
b = createRel(true, "2021-09-10 11:08:52");
a.mergeFrom(b);
assertEquals("2020-09-10 11:08:52", a.getValidationDate());
a = createRel(true, "2021-03-16T10:32:42Z");
b = createRel(true, "2020-03-16T10:32:42Z");
a.mergeFrom(b);
assertEquals("2020-03-16T10:32:42Z", a.getValidationDate());
}
/**
* Merge relation test parse exception.
*/
@Test
void mergeRelationTestParseException() {
assertThrows(DateTimeParseException.class, () -> {
Relation a = createRel(true, "Once upon a time ...");
Relation b = createRel(true, "... in a far away land");
a.mergeFrom(b);
});
}
/**
* Create rel relation.
*
* @param validated the validated
* @param validationDate the validation date
* @return the relation
*/
private Relation createRel(Boolean validated, String validationDate) {
Relation rel = new Relation();
rel.setSource("1");
rel.setTarget("2");
rel.setRelType("reltype");
rel.setSubRelType("subreltype");
rel.setRelClass("relclass");
rel.setValidated(validated);
rel.setValidationDate(validationDate);
return rel;
}
/**
* Sets kv.
*
* @param key the key
* @param value the value
* @return the kv
*/
private KeyValue setKV(final String key, final String value) {
KeyValue k = new KeyValue();
k.setKey(key);
k.setValue(value);
return k;
}
/**
* Sets sp.
*
* @param value the value
* @param schema the schema
* @param classname the classname
* @return the sp
*/
private StructuredProperty setSP(
final String value, final String schema, final String classname) {
StructuredProperty s = new StructuredProperty();
s.setValue(value);
Qualifier q = new Qualifier();
q.setClassname(classname);
q.setClassid(classname);
q.setSchemename(schema);
q.setSchemeid(schema);
s.setQualifier(q);
return s;
}
/**
* Field field.
*
* @param <T> the type parameter
* @param value the value
* @return the field
*/
private <T> Field<T> field(T value) {
Field<T> f = new Field();
f.setValue(value);
return f;
}
/**
* Publication publication.
*
* @return the publication
*/
private Publication publication() {
Publication p = new Publication();
p.setDataInfo(df("0.9"));
return p;
}
/**
* Publication publication.
*
* @param trust the trust
* @return the publication
*/
private Publication publication(String trust) {
Publication p = new Publication();
p.setDataInfo(df(trust));
return p;
}
/**
* Df data info.
*
* @param trust the trust
* @return the data info
*/
private DataInfo df(String trust) {
DataInfo d = new DataInfo();
d.setTrust(trust);
return d;
}
}

View File

@ -1,140 +1 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g",
"resuttype": {"classid": "dataset"},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2011.03.013"
},
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
],
"collectedfrom": [
{
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3",
"value": "Repository B"
}
],
"instance": [
{
"refereed": {
"classid": "0000",
"classname": "UNKNOWN",
"schemeid": "dnet:review_levels",
"schemename": "dnet:review_levels"
},
"hostedby": {
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
"value": "Zenodo"
},
"accessright": {
"classid": "OPEN",
"classname": "Open Access",
"schemeid": "dnet:access_modes",
"schemename": "dnet:access_modes"
},
"processingchargecurrency": {
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "Harvested",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": true,
"trust": "0.9"
},
"value": "EUR"
},
"pid": [
{
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "Harvested",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": true,
"trust": "0.9"
},
"qualifier": {
"classid": "doi",
"classname": "Digital Object Identifier",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "10.1371/journal.pone.0085605"
}
],
"distributionlocation": "",
"url": ["https://doi.org/10.1371/journal.pone.0085605"],
"alternateIdentifier": [
{
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "Harvested",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": true,
"trust": "0.9"
},
"qualifier": {
"classid": "pmid",
"classname": "PubMed ID",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "24454899.0"
}
],
"collectedfrom": {
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3",
"value": "Repository B"
},
"processingchargeamount": {
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "Harvested",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": true,
"trust": "0.9"
},
"value": "1022.02"
},
"instancetype": {
"classid": "0004",
"classname": "Conference object",
"schemeid": "dnet:publication_resource",
"schemename": "dnet:publication_resource"
}
}
]
}
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]}

View File

@ -0,0 +1,12 @@
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0000/ra.v2i3.114::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.65008652949e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0000/ra.v2i3.114"}]}]}
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/(aj).v3i6.458::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.01810569717e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/(aj).v3i6.458"}]}]}
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/1587::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.39172290649e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/1587"}]}]}
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/462::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.33235333753e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.36"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.00285265116e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/462"}]}]}
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/731::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.01810569717e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/731"}]}]}
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/ijllis.v9i4.2066.g2482::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"8.48190886761e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/ijllis.v9i4.2066.g2482"}]}]}
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0118/alfahim.v3i1.140::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9.88840807598e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0118/alfahim.v3i1.140"}]}]}
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0166/fk2.stagefigshare.6442896.v3::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"7.28336930301e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0166/fk2.stagefigshare.6442896.v3"}]}]}
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0301/jttb.v2i1.64::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"7.28336930301e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0301/jttb.v2i1.64"}]}]}
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0809/seruni.v1i1.567::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"2.62959564033e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0809/seruni.v1i1.567"}]}]}
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0809/seruni.v2i1.765::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9.40178571921e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0559872"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"3.67659957614e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0809/seruni.v2i1.765"}]}]}
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0901/jkip.v7i3.485::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.26204125721e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0901/jkip.v7i3.485"}]}]}

View File

@ -0,0 +1 @@
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]}

View File

@ -0,0 +1 @@
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[]}

View File

@ -0,0 +1 @@
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,33 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"instance": [
{
"collectedfrom": {
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
"value": "Crossref"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
}
]
},
{
"pid": [
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
]
}
]
}

View File

@ -0,0 +1,37 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"instance": [
{
"collectedfrom": {
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
"value": "Crossref"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
}
]
},
{
"collectedfrom": {
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
"value": "Europe PubMed Central"
},
"pid": [
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
]
}
]
}

View File

@ -0,0 +1,37 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"instance": [
{
"collectedfrom": {
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
"value": "Zenodo"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
}
]
},
{
"collectedfrom": {
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
"value": "Europe PubMed Central"
},
"pid": [
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
]
}
]
}

View File

@ -0,0 +1,37 @@
{
"id": "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66",
"instance": [
{
"collectedfrom": {
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
"value": "Zenodo"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
},
{
"qualifier": {"classid": "handle"},
"value": "11012/83840"
}
]
},
{
"collectedfrom": {
"key": "10|opendoar____::2852",
"value": "Digital library of Brno University of Technology"
},
"pid": [
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
},
{
"qualifier": {"classid": "handle"},
"value": "11012/83840"
}
]
}
]
}

View File

@ -0,0 +1,37 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"instance": [
{
"collectedfrom": {
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
"value": "Zenodo"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.5281/zenodo.5121485"
}
]
},
{
"collectedfrom": {
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
"value": "Europe PubMed Central"
},
"pid": [
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
]
}
]
}

View File

@ -0,0 +1,31 @@
{
"id": "50|openapc_____::000023f9cb6e3a247c764daec4273cbc",
"resuttype": {
"classid": "publication"
},
"instance": [
{
"collectedfrom": {
"key": "10|apc_________::e2b1600b229fc30663c8a1f662debddf",
"value": "OpenAPC Global Initiative"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
},
{
"qualifier": {"classid": "pmid"},
"value": "25811027"
}
],
"url":["https://doi.org/10.1155/2015/439379"]
}
]
}

View File

@ -0,0 +1,17 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"pid": [
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
]
}

View File

@ -0,0 +1,21 @@
{
"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"instance": [
{
"collectedfrom": {
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
"value": "Europe PubMed Central"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
},
{
"qualifier":{"classid":"pmc"},
"value":"21459329"
}
]
}
]
}

View File

@ -0,0 +1,23 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"pid": [
{
"qualifier": {
"classid": "urn"
},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {
"classid": "scp-number"
},
"value": "79953761260"
},
{
"qualifier": {
"classid": "pmcid"
},
"value": "21459329"
}
]
}

File diff suppressed because one or more lines are too long

View File

@ -511,7 +511,7 @@ object DoiBoostMappingUtil {
if (!ret.startsWith(DOI_PREFIX))
return null
return ret
ret
}

View File

@ -0,0 +1,50 @@
package eu.dnetlib.doiboost
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo}
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
import org.apache.commons.lang.StringUtils
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
import scala.collection.JavaConverters._
object DoiBoostTransformationUtils {
val TRUST = "0.9"
def generateDataInfo(): DataInfo = {
val di = new DataInfo
di.setDeletedbyinference(false)
di.setInferred(false)
di.setInvisible(false)
di.setTrust(TRUST)
di.setProvenanceaction(
OafMapperUtils.qualifier(
ModelConstants.SYSIMPORT_ACTIONSET,
ModelConstants.SYSIMPORT_ACTIONSET,
ModelConstants.DNET_PROVENANCE_ACTIONS,
ModelConstants.DNET_PROVENANCE_ACTIONS
)
)
di
}
def generateOAFAuthor(given: String, family: String, orcid: String, index: Int): Author = {
val a = new Author
a.setName(given)
a.setSurname(family)
a.setFullname(s"$given $family")
a.setRank(index + 1)
if (StringUtils.isNotBlank(orcid))
a.setPid(
List(
structuredProperty(orcid, qualifier(ModelConstants.ORCID_PENDING,ModelConstants.ORCID_PENDING,ModelConstants.DNET_PID_TYPES,ModelConstants.DNET_PID_TYPES), generateDataInfo)
).asJava
)
a
}
}

View File

@ -5,8 +5,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf._
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
import eu.dnetlib.dhp.utils.DHPUtils
import eu.dnetlib.doiboost.DoiBoostMappingUtil
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
import org.apache.commons.lang.StringUtils
import org.apache.commons.lang3.tuple
import org.json4s
@ -15,6 +14,8 @@ import org.json4s.JsonAST._
import org.json4s.jackson.JsonMethods._
import org.slf4j.{Logger, LoggerFactory}
import eu.dnetlib.doiboost.DoiBoostTransformationUtils._
import java.util
import scala.collection.JavaConverters._
import scala.collection.mutable
@ -34,62 +35,9 @@ case class mappingAuthor(
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
case object Crossref2Oaf {
object Crossref2Oaf {
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
// val mappingCrossrefType = Map(
// "book-section" -> "publication",
// "book" -> "publication",
// "book-chapter" -> "publication",
// "book-part" -> "publication",
// "book-series" -> "publication",
// "book-set" -> "publication",
// "book-track" -> "publication",
// "edited-book" -> "publication",
// "reference-book" -> "publication",
// "monograph" -> "publication",
// "journal-article" -> "publication",
// "dissertation" -> "publication",
// "other" -> "publication",
// "peer-review" -> "publication",
// "proceedings" -> "publication",
// "proceedings-article" -> "publication",
// "reference-entry" -> "publication",
// "report" -> "publication",
// "report-series" -> "publication",
// "standard" -> "publication",
// "standard-series" -> "publication",
// "posted-content" -> "publication",
// "dataset" -> "dataset"
// )
//
// val mappingCrossrefSubType = Map(
// "book-section" -> "0013 Part of book or chapter of book",
// "book" -> "0002 Book",
// "book-chapter" -> "0013 Part of book or chapter of book",
// "book-part" -> "0013 Part of book or chapter of book",
// "book-series" -> "0002 Book",
// "book-set" -> "0002 Book",
// "book-track" -> "0002 Book",
// "edited-book" -> "0002 Book",
// "reference-book" -> "0002 Book",
// "monograph" -> "0002 Book",
// "journal-article" -> "0001 Article",
// "dissertation" -> "0044 Thesis",
// "other" -> "0038 Other literature type",
// "peer-review" -> "0015 Review",
// "proceedings" -> "0004 Conference object",
// "proceedings-article" -> "0004 Conference object",
// "reference-entry" -> "0013 Part of book or chapter of book",
// "report" -> "0017 Report",
// "report-series" -> "0017 Report",
// "standard" -> "0038 Other literature type",
// "standard-series" -> "0038 Other literature type",
// "dataset" -> "0021 Dataset",
// "preprint" -> "0016 Preprint",
// "report" -> "0017 Report"
// )
def mappingResult(result: Result, json: JValue, resourceType:Qualifier, instanceType: Qualifier): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
@ -216,7 +164,7 @@ case object Crossref2Oaf {
)
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)
generateOAFAuthor(a.given.orNull, a.family, a.ORCID.orNull, index)
}.asJava)
// Mapping instance
@ -268,10 +216,7 @@ case object Crossref2Oaf {
instance.setDateofacceptance(asField(createdDate.getValue))
}
val s: List[String] = List("https://doi.org/" + doi)
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
// if (links.nonEmpty) {
// instance.setUrl(links.asJava)
// }
if (s.nonEmpty) {
instance.setUrl(s.asJava)
}
@ -297,26 +242,7 @@ case object Crossref2Oaf {
result
}
def generateAuhtor(given: String, family: String, orcid: String, index: Int): Author = {
val a = new Author
a.setName(given)
a.setSurname(family)
a.setFullname(s"$given $family")
a.setRank(index + 1)
if (StringUtils.isNotBlank(orcid))
a.setPid(
List(
createSP(
orcid,
ModelConstants.ORCID_PENDING,
ModelConstants.DNET_PID_TYPES,
generateDataInfo()
)
).asJava
)
a
}
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats

View File

@ -5,6 +5,8 @@ import org.json4s.DefaultFormats
import org.json4s.JsonAST.JString
import org.json4s.jackson.JsonMethods.parse
import org.junit.jupiter.api.Test
import java.io.PrintWriter
class CrossrefFunderTest {
@ -29,18 +31,16 @@ class CrossrefFunderTest {
var i = 0
// val w =new PrintWriter("/tmp/funder_names")
val w =new PrintWriter("/tmp/funder_names")
val data = cf.toIterator.next()
println(data)
// cf.map(s=>parse_funder(s)).foreach(s=> w.write(s"${s._1} \t${s._2} \t${s._3.mkString("--")}\t\n"))
//
//
// w.close()
cf.map(s=>parse_funder(s)).foreach(s=> w.write(s"${s._1} \t${s._2} \t${s._3.mkString("--")}\t\n"))
w.close()
// cf.foreach{_ =>
//
@ -50,11 +50,11 @@ class CrossrefFunderTest {
// println(s"get $i documents")
//
// }
println(s"Total item $i")
//
//
//
//
// println(s"Total item $i")
}

View File

@ -801,7 +801,7 @@
<mockito-core.version>3.3.3</mockito-core.version>
<mongodb.driver.version>3.4.2</mongodb.driver.version>
<vtd.version>[2.12,3.0)</vtd.version>
<dhp-schemas.version>[2.12.0]</dhp-schemas.version>
<dhp-schemas.version>2.12.1-DOIBOOST_REFACTOR-SNAPSHOT</dhp-schemas.version>
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>