diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java new file mode 100644 index 0000000..b064600 --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java @@ -0,0 +1,399 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.commons.lang3.StringUtils; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; + +public class CleaningFunctions { + + public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"; + public static final String DOI_PREFIX = "10."; + + public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/"; + public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)"; + + public static final Set PID_BLACKLIST = new HashSet<>(); + public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*"; + public static final String TITLE_FILTER_REGEX = "[.*test.*\\W\\d]"; + public static final int TITLE_FILTER_RESIDUAL_LENGTH = 10; + + static { + PID_BLACKLIST.add("none"); + PID_BLACKLIST.add("na"); + } + + public static T fixVocabularyNames(T value) { + if (value instanceof Datasource) { + // nothing to clean here + } else if (value instanceof Project) { + // nothing to clean here + } else if (value instanceof Organization) { + Organization o = (Organization) value; + if (Objects.nonNull(o.getCountry())) { + fixVocabName(o.getCountry(), ModelConstants.DNET_COUNTRY_TYPE); + } + } else if (value instanceof Relation) { + // nothing to clean here + } else if (value instanceof Result) { + + Result r = (Result) value; + + fixVocabName(r.getLanguage(), ModelConstants.DNET_LANGUAGES); + fixVocabName(r.getResourcetype(), ModelConstants.DNET_DATA_CITE_RESOURCE); + fixVocabName(r.getBestaccessright(), ModelConstants.DNET_ACCESS_MODES); + + if (Objects.nonNull(r.getSubject())) { + r.getSubject().forEach(s -> fixVocabName(s.getQualifier(), ModelConstants.DNET_SUBJECT_TYPOLOGIES)); + } + if (Objects.nonNull(r.getInstance())) { + for (Instance i : r.getInstance()) { + fixVocabName(i.getAccessright(), ModelConstants.DNET_ACCESS_MODES); + fixVocabName(i.getRefereed(), ModelConstants.DNET_REVIEW_LEVELS); + } + } + if (Objects.nonNull(r.getAuthor())) { + r.getAuthor().stream().filter(Objects::nonNull).forEach(a -> { + if (Objects.nonNull(a.getPid())) { + a.getPid().stream().filter(Objects::nonNull).forEach(p -> { + fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES); + }); + } + }); + } + if (value instanceof Publication) { + + } else if (value instanceof Dataset) { + + } else if (value instanceof OtherResearchProduct) { + + } else if (value instanceof Software) { + + } + } + + return value; + } + + public static boolean filter(T value) { + if (value instanceof Datasource) { + // nothing to evaluate here + } else if (value instanceof Project) { + // nothing to evaluate here + } else if (value instanceof Organization) { + // nothing to evaluate here + } else if (value instanceof Relation) { + // nothing to clean here + } else if (value instanceof Result) { + + Result r = (Result) value; + + if (Objects.nonNull(r.getTitle()) && r.getTitle().isEmpty()) { + return false; + } + + if (value instanceof Publication) { + + } else if (value instanceof Dataset) { + + } else if (value instanceof OtherResearchProduct) { + + } else if (value instanceof Software) { + + } + } + return true; + } + + public static T cleanup(T value) { + if (value instanceof Datasource) { + // nothing to clean here + } else if (value instanceof Project) { + // nothing to clean here + } else if (value instanceof Organization) { + Organization o = (Organization) value; + if (Objects.isNull(o.getCountry()) || StringUtils.isBlank(o.getCountry().getClassid())) { + o.setCountry(ModelConstants.UNKNOWN_COUNTRY); + } + } else if (value instanceof Relation) { + // nothing to clean here + } else if (value instanceof Result) { + + Result r = (Result) value; + if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getValue())) { + r.setPublisher(null); + } + if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) { + r + .setLanguage( + qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES)); + } + if (Objects.nonNull(r.getSubject())) { + r + .setSubject( + r + .getSubject() + .stream() + .filter(Objects::nonNull) + .filter(sp -> StringUtils.isNotBlank(sp.getValue())) + .filter(sp -> Objects.nonNull(sp.getQualifier())) + .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) + .map(CleaningFunctions::cleanValue) + .collect(Collectors.toList())); + } + if (Objects.nonNull(r.getTitle())) { + r + .setTitle( + r + .getTitle() + .stream() + .filter(Objects::nonNull) + .filter(sp -> StringUtils.isNotBlank(sp.getValue())) + .filter( + sp -> sp + .getValue() + .toLowerCase() + .replaceAll(TITLE_FILTER_REGEX, "") + .length() > TITLE_FILTER_RESIDUAL_LENGTH) + .map(CleaningFunctions::cleanValue) + .collect(Collectors.toList())); + } + if (Objects.nonNull(r.getDescription())) { + r + .setDescription( + r + .getDescription() + .stream() + .filter(Objects::nonNull) + .filter(sp -> StringUtils.isNotBlank(sp.getValue())) + .map(CleaningFunctions::cleanValue) + .collect(Collectors.toList())); + } + if (Objects.nonNull(r.getPid())) { + r.setPid(processPidCleaning(r.getPid())); + } + if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) { + r + .setResourcetype( + qualifier(ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE)); + } + if (Objects.nonNull(r.getInstance())) { + + for (Instance i : r.getInstance()) { + Optional + .ofNullable(i.getPid()) + .ifPresent(pid -> { + final Set pids = pid + .stream() + .filter(Objects::nonNull) + .filter(p -> StringUtils.isNotBlank(p.getValue())) + .collect(Collectors.toCollection(HashSet::new)); + + Optional + .ofNullable(i.getAlternateIdentifier()) + .ifPresent(altId -> { + final Set altIds = altId + .stream() + .filter(Objects::nonNull) + .filter(p -> StringUtils.isNotBlank(p.getValue())) + .collect(Collectors.toCollection(HashSet::new)); + + i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids))); + }); + }); + + if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) { + i + .setAccessright( + accessRight( + ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, + ModelConstants.DNET_ACCESS_MODES)); + } + if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) { + i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY); + } + if (Objects.isNull(i.getRefereed())) { + i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS)); + } + } + } + if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) { + Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance()); + if (Objects.isNull(bestaccessrights)) { + r + .setBestaccessright( + qualifier( + ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, + ModelConstants.DNET_ACCESS_MODES)); + } else { + r.setBestaccessright(bestaccessrights); + } + } + if (Objects.nonNull(r.getAuthor())) { + final List authors = Lists.newArrayList(); + for (Author a : r.getAuthor()) { + if (Objects.isNull(a.getPid())) { + a.setPid(Lists.newArrayList()); + } else { + a + .setPid( + a + .getPid() + .stream() + .filter(Objects::nonNull) + .filter(p -> Objects.nonNull(p.getQualifier())) + .filter(p -> StringUtils.isNotBlank(p.getValue())) + .map(p -> { + p.setValue(p.getValue().trim().replaceAll(ORCID_PREFIX_REGEX, "")); + return p; + }) + .filter(p -> StringUtils.isNotBlank(p.getValue())) + .collect( + Collectors + .toMap( + StructuredProperty::getValue, Function.identity(), (p1, p2) -> p1, + LinkedHashMap::new)) + .values() + .stream() + .collect(Collectors.toList())); + } + if (StringUtils.isBlank(a.getFullname())) { + if (StringUtils.isNotBlank(a.getName()) && StringUtils.isNotBlank(a.getSurname())) { + a.setFullname(a.getSurname() + ", " + a.getName()); + } + } + if (StringUtils.isNotBlank(a.getFullname()) && isValidAuthorName(a)) { + authors.add(a); + } + } + + boolean nullRank = authors + .stream() + .anyMatch(a -> Objects.isNull(a.getRank())); + if (nullRank) { + int i = 1; + for (Author author : authors) { + author.setRank(i++); + } + } + r.setAuthor(authors); + + } + if (value instanceof Publication) { + + } else if (value instanceof Dataset) { + + } else if (value instanceof OtherResearchProduct) { + + } else if (value instanceof Software) { + + } + } + + return value; + } + + private static boolean isValidAuthorName(Author a) { + return !Stream + .of(a.getFullname(), a.getName(), a.getSurname()) + .filter(s -> s != null && !s.isEmpty()) + .collect(Collectors.joining("")) + .toLowerCase() + .matches(INVALID_AUTHOR_REGEX); + } + + private static List processPidCleaning(List pids) { + return pids + .stream() + .filter(Objects::nonNull) + .filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue()))) + .filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase())) + .filter(sp -> Objects.nonNull(sp.getQualifier())) + .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) + .map(CleaningFunctions::normalizePidValue) + .filter(CleaningFunctions::pidFilter) + .collect(Collectors.toList()); + } + + protected static StructuredProperty cleanValue(StructuredProperty s) { + s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " ")); + return s; + } + + protected static Field cleanValue(Field s) { + s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " ")); + return s; + } + + // HELPERS + + private static void fixVocabName(Qualifier q, String vocabularyName) { + if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) { + q.setSchemeid(vocabularyName); + q.setSchemename(vocabularyName); + } + } + + private static AccessRight accessRight(String classid, String classname, String scheme) { + return OafMapperUtils + .accessRight( + classid, classname, scheme, scheme); + } + + private static Qualifier qualifier(String classid, String classname, String scheme) { + return OafMapperUtils + .qualifier( + classid, classname, scheme, scheme); + } + + /** + * Utility method that filter PID values on a per-type basis. + * @param s the PID whose value will be checked. + * @return false if the pid matches the filter criteria, true otherwise. + */ + public static boolean pidFilter(StructuredProperty s) { + final String pidValue = s.getValue(); + if (Objects.isNull(s.getQualifier()) || + StringUtils.isBlank(pidValue) || + StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) { + return false; + } + if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) { + return false; + } + if (PidBlacklistProvider.getBlacklist(s.getQualifier().getClassid()).contains(pidValue)) { + return false; + } + return true; + } + + /** + * Utility method that normalises PID values on a per-type basis. + * @param pid the PID whose value will be normalised. + * @return the PID containing the normalised value. + */ + public static StructuredProperty normalizePidValue(StructuredProperty pid) { + String value = Optional + .ofNullable(pid.getValue()) + .map(String::trim) + .orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty")); + switch (pid.getQualifier().getClassid()) { + + // TODO add cleaning for more PID types as needed + case "doi": + pid.setValue(value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)); + break; + } + return pid; + } + +} diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java new file mode 100644 index 0000000..43cdbc1 --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -0,0 +1,225 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import static com.google.common.base.Preconditions.checkArgument; +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; + +import java.io.Serializable; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.lang3.StringUtils; + +import com.google.common.collect.HashBiMap; +import com.google.common.collect.Maps; + +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.*; + +/** + * Factory class for OpenAIRE identifiers in the Graph + */ +public class IdentifierFactory implements Serializable { + + public static final String ID_SEPARATOR = "::"; + public static final String ID_PREFIX_SEPARATOR = "|"; + + public static final int ID_PREFIX_LEN = 12; + + /** + * Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] considered authoritative for that PID_TYPE + */ + public static final Map> PID_AUTHORITY = Maps.newHashMap(); + + static { + PID_AUTHORITY.put(PidType.doi, HashBiMap.create()); + PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref"); + PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite"); + + PID_AUTHORITY.put(PidType.pmc, HashBiMap.create()); + PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central"); + PID_AUTHORITY.get(PidType.pmc).put(PUBMED_CENTRAL_ID, "PubMed Central"); + + PID_AUTHORITY.put(PidType.pmid, HashBiMap.create()); + PID_AUTHORITY.get(PidType.pmid).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central"); + PID_AUTHORITY.get(PidType.pmid).put(PUBMED_CENTRAL_ID, "PubMed Central"); + + PID_AUTHORITY.put(PidType.arXiv, HashBiMap.create()); + PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive"); + } + + public static List getPids(List pid, KeyValue collectedFrom) { + return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList()); + } + + public static String createDOIBoostIdentifier(T entity) { + if (entity == null) + return null; + + StructuredProperty pid = null; + if (entity.getPid() != null) { + pid = entity + .getPid() + .stream() + .filter(Objects::nonNull) + .filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid())) + .filter(CleaningFunctions::pidFilter) + .findAny() + .orElse(null); + } else { + if (entity.getInstance() != null) { + pid = entity + .getInstance() + .stream() + .filter(i -> i.getPid() != null) + .flatMap(i -> i.getPid().stream()) + .filter(CleaningFunctions::pidFilter) + .findAny() + .orElse(null); + } + } + if (pid != null) + return idFromPid(entity, pid, true); + return null; + } + + /** + * Creates an identifier from the most relevant PID (if available) provided by a known PID authority in the given + * entity T. Returns entity.id when none of the PIDs meet the selection criteria is available. + * + * @param entity the entity providing PIDs and a default ID. + * @param the specific entity type. Currently Organization and Result subclasses are supported. + * @param md5 indicates whether should hash the PID value or not. + * @return an identifier from the most relevant PID, entity.id otherwise + */ + public static String createIdentifier(T entity, boolean md5) { + + checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier"); + + final Map> pids = extractPids(entity); + + return pids + .values() + .stream() + .flatMap(s -> s.stream()) + .min(new PidComparator<>(entity)) + .map( + min -> Optional + .ofNullable(pids.get(min.getQualifier().getClassid())) + .map( + p -> p + .stream() + .sorted(new PidValueComparator()) + .findFirst() + .map(s -> idFromPid(entity, s, md5)) + .orElseGet(entity::getId)) + .orElseGet(entity::getId)) + .orElseGet(entity::getId); + } + + private static Map> extractPids(T entity) { + if (entity instanceof Result) { + return Optional + .ofNullable(((Result) entity).getInstance()) + .map( + instance -> mapPids(instance)) + .orElse(new HashMap<>()); + } else { + return entity + .getPid() + .stream() + .map(CleaningFunctions::normalizePidValue) + .filter(CleaningFunctions::pidFilter) + .collect( + Collectors + .groupingBy( + p -> p.getQualifier().getClassid(), + Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new)))); + } + } + + private static Map> mapPids(List instance) { + return instance + .stream() + .map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false)) + .flatMap(Function.identity()) + .collect( + Collectors + .groupingBy( + p -> p.getQualifier().getClassid(), + Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new)))); + } + + private static Stream pidFromInstance(List pid, KeyValue collectedFrom, + boolean mapHandles) { + return Optional + .ofNullable(pid) + .map( + pp -> pp + .stream() + // filter away PIDs provided by a DS that is not considered an authority for the + // given PID Type + .filter(p -> { + return shouldFilterPid(collectedFrom, p, mapHandles); + }) + .map(CleaningFunctions::normalizePidValue) + .filter(CleaningFunctions::pidFilter)) + .orElse(Stream.empty()); + } + + private static boolean shouldFilterPid(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) { + final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid()); + return (mapHandles && pType.equals(PidType.handle)) || Optional.ofNullable(collectedFrom).isPresent() && + Optional + .ofNullable(PID_AUTHORITY.get(pType)) + .map(authorities -> { + return authorities.containsKey(collectedFrom.getKey()) + || authorities.containsValue(collectedFrom.getValue()); + }) + .orElse(false); + } + + /** + * @see {@link IdentifierFactory#createIdentifier(OafEntity, boolean)} + */ + public static String createIdentifier(T entity) { + + return createIdentifier(entity, true); + } + + private static String idFromPid(T entity, StructuredProperty s, boolean md5) { + return new StringBuilder() + .append(ModelSupport.getIdPrefix(entity.getClass())) + .append(ID_PREFIX_SEPARATOR) + .append(createPrefix(s.getQualifier().getClassid())) + .append(ID_SEPARATOR) + .append(md5 ? md5(s.getValue()) : s.getValue()) + .toString(); + } + + // create the prefix (length = 12) + private static String createPrefix(String pidType) { + StringBuilder prefix = new StringBuilder(StringUtils.left(pidType, ID_PREFIX_LEN)); + while (prefix.length() < ID_PREFIX_LEN) { + prefix.append("_"); + } + return prefix.substring(0, ID_PREFIX_LEN); + } + + public static String md5(final String s) { + try { + final MessageDigest md = MessageDigest.getInstance("MD5"); + md.update(s.getBytes(StandardCharsets.UTF_8)); + return new String(Hex.encodeHex(md.digest())); + } catch (final Exception e) { + System.err.println("Error creating id"); + return null; + } + } + +} diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java new file mode 100644 index 0000000..b8f81ea --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java @@ -0,0 +1,22 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +public class ModelHardLimits { + + public static final String LAYOUT = "index"; + public static final String INTERPRETATION = "openaire"; + public static final String SEPARATOR = "-"; + + public static final int MAX_EXTERNAL_ENTITIES = 50; + public static final int MAX_AUTHORS = 200; + public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000; + public static final int MAX_TITLE_LENGTH = 5000; + public static final int MAX_TITLES = 10; + public static final int MAX_ABSTRACT_LENGTH = 150000; + public static final int MAX_INSTANCES = 10; + + public static String getCollectionName(String format) { + return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION; + } + +} diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java new file mode 100644 index 0000000..7465adf --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -0,0 +1,364 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; + +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.StringUtils; + +import eu.dnetlib.dhp.schema.common.AccessRightComparator; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.*; + +public class OafMapperUtils { + + public static Oaf merge(final Oaf left, final Oaf right) { + if (ModelSupport.isSubClass(left, OafEntity.class)) { + return mergeEntities((OafEntity) left, (OafEntity) right); + } else if (ModelSupport.isSubClass(left, Relation.class)) { + ((Relation) left).mergeFrom((Relation) right); + } else { + throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName()); + } + return left; + } + + public static OafEntity mergeEntities(OafEntity left, OafEntity right) { + if (ModelSupport.isSubClass(left, Result.class)) { + return mergeResults((Result) left, (Result) right); + } else if (ModelSupport.isSubClass(left, Datasource.class)) { + ((Datasource) left).mergeFrom((Datasource) right); + } else if (ModelSupport.isSubClass(left, Organization.class)) { + ((Organization) left).mergeFrom((Organization) right); + } else if (ModelSupport.isSubClass(left, Project.class)) { + ((Project) left).mergeFrom((Project) right); + } else { + throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); + } + return left; + } + + public static Result mergeResults(Result left, Result right) { + if (new ResultTypeComparator().compare(left, right) < 0) { + left.mergeFrom(right); + return left; + } else { + right.mergeFrom(left); + return right; + } + } + + public static KeyValue keyValue(final String k, final String v) { + final KeyValue kv = new KeyValue(); + kv.setKey(k); + kv.setValue(v); + return kv; + } + + public static List listKeyValues(final String... s) { + if (s.length % 2 > 0) { + throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); + } + + final List list = new ArrayList<>(); + for (int i = 0; i < s.length; i += 2) { + list.add(keyValue(s[i], s[i + 1])); + } + return list; + } + + public static Field field(final T value, final DataInfo info) { + if (value == null || StringUtils.isBlank(value.toString())) { + return null; + } + + final Field field = new Field<>(); + field.setValue(value); + field.setDataInfo(info); + return field; + } + + public static List> listFields(final DataInfo info, final String... values) { + return Arrays + .stream(values) + .map(v -> field(v, info)) + .filter(Objects::nonNull) + .filter(distinctByKey(f -> f.getValue())) + .collect(Collectors.toList()); + } + + public static List> listFields(final DataInfo info, final List values) { + return values + .stream() + .map(v -> field(v, info)) + .filter(Objects::nonNull) + .filter(distinctByKey(f -> f.getValue())) + .collect(Collectors.toList()); + } + + public static Qualifier unknown(final String schemeid, final String schemename) { + return qualifier("UNKNOWN", "Unknown", schemeid, schemename); + } + + public static AccessRight accessRight( + final String classid, + final String classname, + final String schemeid, + final String schemename) { + return accessRight(classid, classname, schemeid, schemename, null); + } + + public static AccessRight accessRight( + final String classid, + final String classname, + final String schemeid, + final String schemename, + final OpenAccessRoute openAccessRoute) { + final AccessRight accessRight = new AccessRight(); + accessRight.setClassid(classid); + accessRight.setClassname(classname); + accessRight.setSchemeid(schemeid); + accessRight.setSchemename(schemename); + accessRight.setOpenAccessRoute(openAccessRoute); + return accessRight; + } + + public static Qualifier qualifier( + final String classid, + final String classname, + final String schemeid, + final String schemename) { + final Qualifier q = new Qualifier(); + q.setClassid(classid); + q.setClassname(classname); + q.setSchemeid(schemeid); + q.setSchemename(schemename); + return q; + } + + public static Qualifier qualifier(final Qualifier qualifier) { + final Qualifier q = new Qualifier(); + q.setClassid(qualifier.getClassid()); + q.setClassname(qualifier.getClassname()); + q.setSchemeid(qualifier.getSchemeid()); + q.setSchemename(qualifier.getSchemename()); + return q; + } + + public static StructuredProperty structuredProperty( + final String value, + final String classid, + final String classname, + final String schemeid, + final String schemename, + final DataInfo dataInfo) { + + return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo); + } + + public static StructuredProperty structuredProperty( + final String value, + final Qualifier qualifier, + final DataInfo dataInfo) { + if (value == null) { + return null; + } + final StructuredProperty sp = new StructuredProperty(); + sp.setValue(value); + sp.setQualifier(qualifier); + sp.setDataInfo(dataInfo); + return sp; + } + + public static ExtraInfo extraInfo( + final String name, + final String value, + final String typology, + final String provenance, + final String trust) { + final ExtraInfo info = new ExtraInfo(); + info.setName(name); + info.setValue(value); + info.setTypology(typology); + info.setProvenance(provenance); + info.setTrust(trust); + return info; + } + + public static OAIProvenance oaiIProvenance( + final String identifier, + final String baseURL, + final String metadataNamespace, + final Boolean altered, + final String datestamp, + final String harvestDate) { + + final OriginDescription desc = new OriginDescription(); + desc.setIdentifier(identifier); + desc.setBaseURL(baseURL); + desc.setMetadataNamespace(metadataNamespace); + desc.setAltered(altered); + desc.setDatestamp(datestamp); + desc.setHarvestDate(harvestDate); + + final OAIProvenance p = new OAIProvenance(); + p.setOriginDescription(desc); + + return p; + } + + public static Journal journal( + final String name, + final String issnPrinted, + final String issnOnline, + final String issnLinking, + final DataInfo dataInfo) { + return journal( + name, + issnPrinted, + issnOnline, + issnLinking, + null, + null, + null, + null, + null, + null, + null, + dataInfo); + } + + public static Journal journal( + final String name, + final String issnPrinted, + final String issnOnline, + final String issnLinking, + final String ep, + final String iss, + final String sp, + final String vol, + final String edition, + final String conferenceplace, + final String conferencedate, + final DataInfo dataInfo) { + + if (StringUtils.isNotBlank(name) + || StringUtils.isNotBlank(issnPrinted) + || StringUtils.isNotBlank(issnOnline) + || StringUtils.isNotBlank(issnLinking)) { + final Journal j = new Journal(); + j.setName(name); + j.setIssnPrinted(issnPrinted); + j.setIssnOnline(issnOnline); + j.setIssnLinking(issnLinking); + j.setEp(ep); + j.setIss(iss); + j.setSp(sp); + j.setVol(vol); + j.setEdition(edition); + j.setConferenceplace(conferenceplace); + j.setConferencedate(conferencedate); + j.setDataInfo(dataInfo); + return j; + } else { + return null; + } + } + + public static DataInfo dataInfo( + final Boolean deletedbyinference, + final String inferenceprovenance, + final Boolean inferred, + final Boolean invisible, + final Qualifier provenanceaction, + final String trust) { + final DataInfo d = new DataInfo(); + d.setDeletedbyinference(deletedbyinference); + d.setInferenceprovenance(inferenceprovenance); + d.setInferred(inferred); + d.setInvisible(invisible); + d.setProvenanceaction(provenanceaction); + d.setTrust(trust); + return d; + } + + public static String createOpenaireId( + final int prefix, + final String originalId, + final boolean to_md5) { + if (StringUtils.isBlank(originalId)) { + return null; + } else if (to_md5) { + final String nsPrefix = StringUtils.substringBefore(originalId, "::"); + final String rest = StringUtils.substringAfter(originalId, "::"); + return String.format("%s|%s::%s", prefix, nsPrefix, IdentifierFactory.md5(rest)); + } else { + return String.format("%s|%s", prefix, originalId); + } + } + + public static String createOpenaireId( + final String type, + final String originalId, + final boolean to_md5) { + switch (type) { + case "datasource": + return createOpenaireId(10, originalId, to_md5); + case "organization": + return createOpenaireId(20, originalId, to_md5); + case "person": + return createOpenaireId(30, originalId, to_md5); + case "project": + return createOpenaireId(40, originalId, to_md5); + default: + return createOpenaireId(50, originalId, to_md5); + } + } + + public static String asString(final Object o) { + return o == null ? "" : o.toString(); + } + + public static Predicate distinctByKey( + final Function keyExtractor) { + final Map seen = new ConcurrentHashMap<>(); + return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null; + } + + public static Qualifier createBestAccessRights(final List instanceList) { + return getBestAccessRights(instanceList); + } + + protected static Qualifier getBestAccessRights(final List instanceList) { + if (instanceList != null) { + final Optional min = instanceList + .stream() + .map(i -> i.getAccessright()) + .min(new AccessRightComparator<>()); + + final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier(); + + if (StringUtils.isBlank(rights.getClassid())) { + rights.setClassid(UNKNOWN); + } + if (StringUtils.isBlank(rights.getClassname()) + || UNKNOWN.equalsIgnoreCase(rights.getClassname())) { + rights.setClassname(NOT_AVAILABLE); + } + if (StringUtils.isBlank(rights.getSchemeid())) { + rights.setSchemeid(DNET_ACCESS_MODES); + } + if (StringUtils.isBlank(rights.getSchemename())) { + rights.setSchemename(DNET_ACCESS_MODES); + } + + return rights; + } + return null; + } +} diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OrganizationPidComparator.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OrganizationPidComparator.java new file mode 100644 index 0000000..3a6df29 --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OrganizationPidComparator.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import java.util.Comparator; + +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +public class OrganizationPidComparator implements Comparator { + + @Override + public int compare(StructuredProperty left, StructuredProperty right) { + + PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid()); + PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid()); + + if (lClass.equals(PidType.openorgs)) + return -1; + if (rClass.equals(PidType.openorgs)) + return 1; + + if (lClass.equals(PidType.GRID)) + return -1; + if (rClass.equals(PidType.GRID)) + return 1; + + if (lClass.equals(PidType.mag_id)) + return -1; + if (rClass.equals(PidType.mag_id)) + return 1; + + if (lClass.equals(PidType.urn)) + return -1; + if (rClass.equals(PidType.urn)) + return 1; + + return 0; + } +} diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java new file mode 100644 index 0000000..0b8e5e3 --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java @@ -0,0 +1,8 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import java.util.HashMap; +import java.util.HashSet; + +public class PidBlacklist extends HashMap> { +} diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java new file mode 100644 index 0000000..1c1c21f --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java @@ -0,0 +1,37 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; + +import org.apache.commons.io.IOUtils; + +import com.fasterxml.jackson.databind.ObjectMapper; + +public class PidBlacklistProvider { + + private static final PidBlacklist blacklist; + + static { + try { + String json = IOUtils.toString(IdentifierFactory.class.getResourceAsStream("pid_blacklist.json")); + blacklist = new ObjectMapper().readValue(json, PidBlacklist.class); + + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public static PidBlacklist getBlacklist() { + return blacklist; + } + + public static Set getBlacklist(String pidType) { + return Optional + .ofNullable(getBlacklist().get(pidType)) + .orElse(new HashSet<>()); + } + +} diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java new file mode 100644 index 0000000..2bee0eb --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java @@ -0,0 +1,48 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import java.util.Comparator; + +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +public class PidComparator implements Comparator { + + private T entity; + + public PidComparator(T entity) { + this.entity = entity; + } + + @Override + public int compare(StructuredProperty left, StructuredProperty right) { + + if (left == null && right == null) + return 0; + if (left == null) + return 1; + if (right == null) + return -1; + + if (ModelSupport.isSubClass(entity, Result.class)) { + return compareResultPids(left, right); + } + if (ModelSupport.isSubClass(entity, Organization.class)) { + return compareOrganizationtPids(left, right); + } + + // Else (but unlikely), lexicographical ordering will do. + return left.getQualifier().getClassid().compareTo(right.getQualifier().getClassid()); + } + + private int compareResultPids(StructuredProperty left, StructuredProperty right) { + return new ResultPidComparator().compare(left, right); + } + + private int compareOrganizationtPids(StructuredProperty left, StructuredProperty right) { + return new OrganizationPidComparator().compare(left, right); + } +} diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java new file mode 100644 index 0000000..5a297be --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java @@ -0,0 +1,29 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import org.apache.commons.lang3.EnumUtils; + +public enum PidType { + + // Result + doi, pmid, pmc, handle, arXiv, nct, pdb, + + // Organization + openorgs, corda, corda_h2020, GRID, mag_id, urn, + + // Used by dedup + undefined, original; + + public static boolean isValid(String type) { + return EnumUtils.isValidEnum(PidType.class, type); + } + + public static PidType tryValueOf(String s) { + try { + return PidType.valueOf(s); + } catch (Exception e) { + return PidType.original; + } + } + +} diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java new file mode 100644 index 0000000..0e20835 --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidValueComparator.java @@ -0,0 +1,33 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import java.util.Comparator; +import java.util.Optional; + +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +public class PidValueComparator implements Comparator { + + @Override + public int compare(StructuredProperty left, StructuredProperty right) { + + if (left == null && right == null) + return 0; + if (left == null) + return 1; + if (right == null) + return -1; + + StructuredProperty l = CleaningFunctions.normalizePidValue(left); + StructuredProperty r = CleaningFunctions.normalizePidValue(right); + + return Optional + .ofNullable(l.getValue()) + .map( + lv -> Optional + .ofNullable(r.getValue()) + .map(rv -> lv.compareTo(rv)) + .orElse(-1)) + .orElse(1); + } +} diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultPidComparator.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultPidComparator.java new file mode 100644 index 0000000..e51c480 --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultPidComparator.java @@ -0,0 +1,53 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import java.util.Comparator; + +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +public class ResultPidComparator implements Comparator { + + @Override + public int compare(StructuredProperty left, StructuredProperty right) { + + PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid()); + PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid()); + + if (lClass.equals(PidType.doi)) + return -1; + if (rClass.equals(PidType.doi)) + return 1; + + if (lClass.equals(PidType.pmid)) + return -1; + if (rClass.equals(PidType.pmid)) + return 1; + + if (lClass.equals(PidType.pmc)) + return -1; + if (rClass.equals(PidType.pmc)) + return 1; + + if (lClass.equals(PidType.handle)) + return -1; + if (rClass.equals(PidType.handle)) + return 1; + + if (lClass.equals(PidType.arXiv)) + return -1; + if (rClass.equals(PidType.arXiv)) + return 1; + + if (lClass.equals(PidType.nct)) + return -1; + if (rClass.equals(PidType.nct)) + return 1; + + if (lClass.equals(PidType.pdb)) + return -1; + if (rClass.equals(PidType.pdb)) + return 1; + + return 0; + } +} diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultTypeComparator.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultTypeComparator.java new file mode 100644 index 0000000..80a7ce0 --- /dev/null +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ResultTypeComparator.java @@ -0,0 +1,76 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID; + +import java.util.Comparator; +import java.util.HashSet; +import java.util.Optional; +import java.util.stream.Collectors; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class ResultTypeComparator implements Comparator { + + @Override + public int compare(Result left, Result right) { + + if (left == null && right == null) + return 0; + if (left == null) + return 1; + if (right == null) + return -1; + + HashSet lCf = getCollectedFromIds(left); + HashSet rCf = getCollectedFromIds(right); + + if (lCf.contains(CROSSREF_ID) && !rCf.contains(CROSSREF_ID)) { + return -1; + } + if (!lCf.contains(CROSSREF_ID) && rCf.contains(CROSSREF_ID)) { + return 1; + } + + String lClass = left.getResulttype().getClassid(); + String rClass = right.getResulttype().getClassid(); + + if (lClass.equals(rClass)) + return 0; + + if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)) + return -1; + if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)) + return 1; + + if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)) + return -1; + if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)) + return 1; + + if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID)) + return -1; + if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID)) + return 1; + + if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID)) + return -1; + if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID)) + return 1; + + // Else (but unlikely), lexicographical ordering will do. + return lClass.compareTo(rClass); + } + + protected HashSet getCollectedFromIds(Result left) { + return Optional + .ofNullable(left.getCollectedfrom()) + .map( + cf -> cf + .stream() + .map(c -> c.getKey()) + .collect(Collectors.toCollection(HashSet::new))) + .orElse(new HashSet<>()); + } +} diff --git a/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json b/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json new file mode 100644 index 0000000..05e8cde --- /dev/null +++ b/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json @@ -0,0 +1,5 @@ +{ + "doi" : [ "10.12739/10.12739", "10.11646/zootaxa.4404.1.1", "10.5281/zenodo.3678492", "10.11646/zootaxa.4757.1.1", "10.17176/20170811-142447", "10.6035/asparkia", "10.11646/zootaxa.4754.1.6", "10.11646/zootaxa.4784.1.1", "10.6035/millars", "10.11646/zootaxa.4776.1.1", "10.1590/1982-0224-20170094", "10.11646/zootaxa.4773.1.1", "10.11646/zootaxa.4744.1.1", "10.3897/zookeys.38.383", "10.1371/journal.", "10.5281/zenodo.3727017", "10.5252/zoosystema2019v41a15", "10.6035/dossiersf", "10.11646/zootaxa.4754.1.20", "10.6035/recerca", "10.11646/zootaxa.4428.1.1", "10.7179/psri", "10.11646/zootaxa.4785.1.1", "10.2478/aemnp-2018-0014", "10.17979/spudc.9788497497565", "10.2139/ssrn.2721313", "10.17979/spudc.9788497497749", "10.5281/zenodo.3760976", "10.11646/zootaxa.4381.1.1", "10.6035/tiempos", "10.11646/zootaxa.4754.1.10", "10.5281/zenodo.3776452", "10.11646/zootaxa.4754.1.16", "10.5252/zoosystema2019v41a26", "10.11646/zootaxa.4759.2.1", "10.11646/zootaxa.4741.1.1", "10.5252/zoosystema2019v41a4", "10.1145/nnnnnnn.nnnnnnn", "10.17979/spudc.9788497497169", "10.11646/zootaxa.4780.3.1", "10.11646/zootaxa.4663.1.1", "10.5281/zenodo.3748525", "10.5281/zenodo.3746744", "10.3920/978-90-8686-761-5", "10.14198/eurau18alicante", "10.5252/geodiversitas2019v41a8", "10.4126/38m-0000003", "10.5281/zenodo.3648511", "10.6035/clr", "10.4126/38m-0000004", "10.5281/zenodo.3732535", "10.5281/zenodo.3355776", "10.4126/38m-0000002", "10.11646/zootaxa.4763.3.3", "10.11646/zootaxa.4413.3.1", "10.1163/9789004416208_005", "10.4126/38m-0000001", "10.3897/zookeys.30.308", "10.4126/38m-0000000", "10.5281/zenodo.3739808", "10.5281/zenodo.3674873", "10.3161/00034541anz2020.70.1.003", "10.5281/zenodo.3738648", "10.11646/zootaxa.4765.1.1", "10.11646/zootaxa.4754.1.8", "10.3897/zookeys.36.306", "10.4230/lipics", "10.5281/zenodo.3758345", "10.3161/00034541anz2020.70.1.001", "10.3929/ethz-a-005427569", "10.11646/zootaxa.4772.1.1", "10.5281/zenodo.3677235", "10.11646/zootaxa.4766.1.1", "10.17509/jurnal", "10.1145/1235", "10.11646/zootaxa.4754.1.15", "10.2478/aemnp-2018-0018", "10.11646/zootaxa.4538.1.1", "10.11646/zootaxa.4740.1.1", "10.3897/zookeys.32.282", "10.3897/zookeys.2.56", "10.3897/zookeys.39.425", "10.11646/zootaxa.4514.3.3", "10.1007/978-94-007-1966-8", "10.3897/zookeys.26.214", "10.11646/zootaxa.4106.1.1", "10.3897/zookeys.22.219", "10.11646/zootaxa.4748.2.1", "10.5252/zoosystema2019v41a19", "10.3897/zookeys.22.122", "10.1080/00222933.2019.1634225", "10.11646/zootaxa.4632.1.1", "10.1007/s00259-016-3484-4", "10.3897/zookeys.19.221", "10.3897/zookeys.2.7", "10.11646/zootaxa.4777.1.1", "10.14279/depositonce-3753", "10.1111/apha.12712", "10.11646/zootaxa.4759.3.4", "10.11646/zootaxa.4754.1.9", "10.11646/zootaxa.4747.2.8", "10.5281/zenodo.3757451", "10.5281/zenodo.3740269", "10.5252/zoosystema2020v42a4", "10.1140/epje/i2013-13103-3", "10.1177/0301006619863862", "10.5281/zenodo.3726987", "10.12795/hid", "10.24042/jipf", "10.12795/e-rips", "10.1186/s12913-016-1423-5", "10.4126/38m-0000005", "10.3847/2041-8213/aa91c9", "10.1145/1122445.1122456", "10.1103/physrevlett.114.191803", "10.3920/978-90-8686-782-0", "10.11646/zootaxa.4739.1.1", "10.11646/zootaxa.4770.1.1", "10.21009/10.21009/jpd.081", "10.1080/15548627.2015.1100356", "10.12795/ricl", "10.3897/zookeys.34.309", "10.1080/00222933.2019.1692088", "10.4126/frl01-0064002", "10.1371/journal", "10.1175/1520-0485(2002)032", "10.3897/zookeys.22.152", "10.11646/zootaxa.4731.2.1", "10.4126/frl01-0064005", "10.11646/zootaxa.4738.1.1", "10.11646/zootaxa.4780.1.6", "10.4126/frl01-0064004", "10.6018/analesps.31.1.158071", "10.1007/jhep08(2016)045", "10.5281/zenodo.3759519", "10.4126/frl01-0064010", "10.11646/zootaxa.4537.1.1", "10.5281/zenodo.3713533", "10.5281/zenodo.3742020", "10.4126/frl01-0064014", "10.4126/frl01-0064001", "10.1000/isbn", "10.5281/zenodo.3777290", "10.4126/frl01-0064008", "10.1159/000440895", "10.3897/zookeys.31.140", "10.4126/frl01-0064003", "10.1080/00222933.2018.1524032", "10.21686/2500-3925-2014-6", "10.1016/j.bbr.2011.03.031", "10.4126/frl01-0064006", "10.4126/frl01-0064007", "10.4126/frl01-0064020", "10.4126/frl01-0064016", "10.2478/aemnp-2018-0013", "10.4126/frl01-0064021", "10.5281/zenodo.3754300", "10.15330/gal.29-30.", "10.3897/zookeys.2.4", "10.5252/zoosystema2019v41a7", "10.22435/bpk.v17i2", "10.4126/frl01-0063997", "10.3897/zookeys.11.160", "10.11646/zootaxa.4754.1.14", "10.4126/frl01-0064013", "10.1080/20013078.2018.1535750", "10.1016/j.", "10.4126/frl01-0064011", "10.1002/ece3.2579", "10.1088/0264-9381/28/9/094001", "10.3897/zookeys.2.25", "10.4126/frl01-0064019", "10.4126/frl01-0063994", "10.4126/frl01-0064135", "10.4126/frl01-0063998", "10.12795/ppa", "10.4126/frl01-0064009", "10.11646/zootaxa.4769.1.1", "10.11646/zootaxa.4419.1.1", "10.11646/zootaxa.4733.1.1", "10.4126/frl01-0063993", "10.3161/15081109acc2016.18.1.005", "10.11646/zootaxa.4763.1.2", "10.11646/zootaxa.4754.1.19", "10.4126/frl01-0064136", "10.4126/frl01-0064159", "10.4126/frl01-0063999", "10.4126/frl01-0064161", "10.1089/ten.tea.2015.5000.abstracts", "10.1002/(issn)1521-3773", "10.1140/epjc/s10052-015-3325-9", "10.1016/j.physletb.2016.04.050", "10.1007/jhep04(2015)117", "10.1111/gcb.14904", "10.1016/s0140-6736(17)32129-3", "10.11646/zootaxa.4748.1.1", "10.4126/frl01-0064078", "10.1140/epjc/s10052-015-3408-7", "10.1002/(issn)1097-4652", "10.1007/jhep06(2015)121", "10.1007/jhep09(2014)103", "10.1016/j.gca.2007.06.021", "10.1007/jhep09(2015)049", "10.3897/zookeys.4.32", "10.6101/azq/0002", "10.11646/zootaxa.4764.1.1", "10.11646/zootaxa.4772.1.5", "10.4126/frl01-0064000", "10.4126/frl01-0064131", "10.1016/j.physletb.2015.08.061", "10.1007/jhep01(2015)069", "10.1016/j.physletb.2016.06.039", "10.1016/j.physletb.2015.07.011", "10.1007/jhep04(2015)116", "10.3920/978-90-8686-797-4", "10.1016/j.physletb.2015.12.020", "10.1016/j.physletb.2015.04.042", "10.1016/j.physletb.2016.06.004", "10.1140/epjc/s10052-015-3261-8", "10.1016/j.physletb.2015.10.067", "10.1016/j.physletb.2015.07.065", "10.1163/1876312x-00002195", "10.1016/j.physletb.2013.12.010", "10.1016/j.physletb.2013.01.024", "10.1007/jhep11(2014)056", "10.1007/jhep12(2017)142", "10.1002/pds.4864", "10.1140/epjc/s10052-015-3262-7", "10.1016/j.physletb.2014.09.054", "10.1140/epjc/s10052-015-3373-1", "10.1007/jhep03(2015)041", "10.1016/j.physletb.2016.02.047", "10.4126/frl01-0064018", "10.1016/j.physletb.2014.01.042", "10.1007/jhep09(2014)037", "10.1007/978-94-017-7285-3", "10.1007/s00424-013-1401-2", "10.1007/s00259-017-3822-1", "10.1177/0301006616671273", "10.1007/jhep09(2014)112", "10.1007/jhep06(2015)116", "10.1140/epjc/s10052-018-6243-9", "10.1140/epjc/s10052-017-4692-1", "10.1007/jhep10(2015)144", "10.1007/jhep07(2017)107", "10.1007/jhep11(2014)088", "10.1016/j.physletb.2014.01.006", "10.1007/jhep01(2018)055", "10.1016/j.physletb.2016.03.060", "10.1140/epjc/s10052-019-6904-3", "10.11646/zootaxa.4737.1.1", "10.3934/xx.xx.xx.xx", "10.11646/zootaxa.4758.2.1", "10.1016/j.physletb.2015.10.004", "10.1016/j.physletb.2015.07.053", "10.5798/diclemedj.0921.2012.04.0184", "10.1007/jhep04(2014)169", "10.4126/frl01-0064160", "10.3989/aem.2001.v31.i2", "10.1039/x0xx00000x", "10.11646/zootaxa.3856.4.1", "10.4126/frl01-0064133", "10.1007/jhep05(2015)078", "10.1016/j.physletb.2012.08.020", "10.1007/jhep07(2015)032", "10.1159/000090218", "10.1016/j.physletb.2014.03.015", "10.1007/jhep09(2015)108", "10.1007/jhep09(2015)050", "10.1007/jhep01(2014)163", "10.1016/j.physletb.2014.11.026", "10.1140/epjc/s10052-016-4580-0", "10.1140/epjc/s10052-014-3109-7", "10.1140/epjc/s10052-014-3231-6", "10.1007/jhep02(2014)088", "10.1016/j.physletb.2016.01.056", "10.1016/j.physletb.2015.08.047", "10.1016/j.physletb.2015.12.039", "10.1007/jhep11(2015)071", "10.1140/epjc/s10052-015-3853-3", "10.1007/jhep04(2015)124", "10.1016/j.physletb.2015.07.010", "10.5281/zenodo.3413524", "10.1007/jhep04(2014)031", "10.1007/jhep07(2015)157", "10.1103/physrevd.90.052008", "10.1007/jhep11(2014)118", "10.3920/978-90-8686-708-0", "10.5281/zenodo.1136235", "10.1103/physrevd.86.032003", "10.1016/j.physletb.2016.01.032", "10.1007/jhep03(2018)174", "10.1007/jhep10(2017)182", "10.1140/epjst/e2019-900045-4", "10.1016/j.physletb.2015.06.070", "10.1140/epjc/s10052-016-4067-z", "10.1016/j.physletb.2015.11.042", "10.1007/jhep04(2018)033", "10.1007/jhep09(2014)145", "10.1016/j.physletb.2016.08.055", "10.1016/j.physletb.2015.04.002", "10.1007/jhep03(2014)032", "10.1140/epjc/s10052-017-5491-4", "10.1016/j.physletb.2015.09.062", "10.1016/j.physletb.2014.12.003", "10.1016/j.physletb.2015.03.017", "10.1140/epjc/s10052-014-3195-6", "10.1140/epjc/s10052-016-4034-8", "10.1140/epjc/s10052-016-4070-4", "10.1140/epjc/s10052-018-5693-4", "10.4126/frl01-0064017", "10.1007/jhep08(2014)173", "10.1016/j.physletb.2014.06.076", "10.1016/j.physletb.2018.11.064", "10.1140/epjc/s10052-017-4988-1", "10.11646/zootaxa.4258.4.3", "10.11646/zootaxa.4766.1.2", "10.11646/zootaxa.4780.1.1", "10.5281/zenodo.3693943", "10.4126/frl01-0064129", "10.15330/gal.28.", "10.1007/jhep02(2016)145", "10.1007/jhep04(2014)172", "10.1007/jhep04(2016)005", "10.1007/jhep03(2016)125", "10.1016/j.physletb.2018.02.033", "10.1007/jhep08(2017)052", "10.1007/jhep12(2017)085", "10.1007/jhep09(2014)176", "10.1007/jhep12(2017)024", "10.1140/epjc/s10052-018-5686-3", "10.1016/j.physletb.2016.11.035", "10.1016/j.physletb.2015.12.017", "10.1140/epjc/s10052-015-3542-2", "10.1140/epjc/s10052-014-3071-4", "10.1103/physrevd.97.032009", "10.1140/epjc/s10052-015-3306-z", "10.1016/j.physletb.2017.12.043", "10.1140/epjc/s10052-014-3233-4", "10.1016/j.physletb.2018.09.013", "10.1016/j.gca.2007.06.014", "10.1016/j.physletb.2016.05.005", "10.1038/s41586-019-1171-x", "10.1016/j.physletb.2016.05.087", "10.1007/jhep06(2018)022", "10.1016/j.physletb.2016.01.057", "10.1016/j.physletb.2018.03.023", "10.1140/epjc/s10052-015-3351-7", "10.1126/science.aap8757", "10.1007/jhep09(2015)137", "10.1007/jhep01(2015)063", "10.1007/jhep01(2018)126", "10.1016/j.gca.2007.06.020", "10.1140/epjc/s10052-018-5595-5", "10.1016/j.physletb.2015.02.015", "10.1016/j.physletb.2014.06.077", "10.1007/jhep12(2017)059", "10.1007/jhep10(2017)141", "10.1007/jhep02(2014)107", "10.1140/epjc/s10052-014-2965-5", "10.1016/j.physletb.2015.07.079", "10.1007/jhep10(2017)112", "10.1140/epjc/s10052-014-2982-4", "10.1007/jhep05(2016)160", "10.1016/j.physletb.2016.07.030", "10.1140/epjc/s10052-014-3168-9", "10.1140/epjc/s10052-018-5583-9", "10.1140/epjc/s10052-016-4184-8", "10.1007/jhep08(2015)105", "10.1007/jhep05(2015)061", "10.1103/physrevd.97.032003", "10.1140/epjc/s10052-014-3190-y", "10.1016/j.physletb.2012.10.061", "10.1140/epjc/s10052-014-2941-0", "10.1016/j.physletb.2016.02.002", "10.1016/j.physletb.2016.05.033", "10.1007/jhep01(2014)096", "10.1007/jhep09(2015)201", "10.1016/j.physletb.2016.01.010", "10.1016/j.physletb.2015.07.037", "10.1007/jhep07(2015)042", "10.1016/j.physletb.2016.05.044", "10.1016/j.physletb.2016.05.088", "10.3897/zookeys.2.2", "10.1007/jhep11(2015)018", "10.1007/jhep11(2015)189", "10.1016/j.physletb.2016.10.014", "10.1007/jhep06(2015)080", "10.1016/j.physletb.2014.11.042", "10.1140/epjc/s10052-014-3157-z", "10.1140/epjc/s10052-015-3406-9", "10.1016/j.physletb.2016.02.056", "10.1016/j.physletb.2015.03.054", "10.1140/epjc/s10052-016-4574-y", "10.5252/geodiversitas2019v41a15", "10.1007/jhep09(2014)094", "10.1140/epjc/s10052-017-5486-1", "10.1007/jhep03(2018)095", "10.11646/zootaxa.4736.1.1", "10.11646/zootaxa.4766.2.1", "10.5281/zenodo.3762392", "10.5281/zenodo.3761958", "10.11646/zootaxa.4403.3.2", "10.1553/iswimab", "10.11646/zootaxa.3750.5.1", "10.4126/frl01-0064134", "10.1103/physrevd.87.032002", "10.1140/epjc/s10052-013-2676-3", "10.1007/jhep02(2015)153", "10.1007/jhep08(2017)006", "10.1016/j.physletb.2016.11.005", "10.1007/jhep01(2013)029", "10.1007/jhep10(2017)132", "10.1016/j.physletb.2013.01.034", "10.1016/j.physletb.2016.03.046", "10.1140/epjc/s10052-016-3988-x", "10.1016/j.physletb.2016.07.006", "10.1140/epjc/s10052-018-5752-x", "10.1140/epjc/s10052-015-3454-1", "10.1002/ece3.1303", "10.1007/jhep02(2014)013", "10.1007/jhep06(2016)081", "10.1140/epjc/s10052-014-3117-7", "10.1007/jhep09(2017)084", "10.1016/j.physletb.2017.09.078", "10.1007/jhep08(2016)005", "10.1007/jhep01(2015)020", "10.1140/epjc/s10052-017-4852-3", "10.1016/j.physletb.2018.02.045", "10.7818/sibecolandaeetmeeting.2019", "10.1007/jhep11(2014)104", "10.1007/jhep05(2018)077", "10.1016/j.physletb.2016.11.045", "10.1016/j.physletb.2016.10.042", "10.1140/epjc/s10052-016-4203-9", "10.1007/jhep01(2015)068", "10.1007/jhep06(2016)093", "10.1016/j.physletb.2015.09.051", "10.1140/epjc/s10052-015-3534-2", "10.1007/jhep09(2014)087", "10.1016/j.physletb.2014.05.055", "10.1016/j.physletb.2014.02.033", "10.1140/epjc/s10052-017-5225-7", "10.1140/epjc/s10052-017-5442-0", "10.1016/s0140-6736(18)32335-3", "10.1016/j.physletb.2017.11.049", "10.1007/jhep06(2018)166", "10.1016/j.physletb.2016.05.002", "10.1140/epjc/s10052-016-4219-1", "10.1140/epjst/e2019-900087-0", "10.1007/jhep01(2016)166", "10.1007/jhep01(2018)097", "10.1016/j.physletb.2017.11.043", "10.1016/j.physletb.2018.04.036", "10.1140/epjc/s10052-018-5607-5", "10.1007/jhep12(2017)034", "10.1007/jhep11(2016)112", "10.1007/jhep06(2014)008", "10.1140/epjc/s10052-012-2261-1", "10.1016/j.physletb.2014.08.039", "10.1016/s0140-6736(16)31919-5", "10.1140/epjc/s10052-019-7058-z", "10.1016/j.physletb.2014.07.053", "10.1007/jhep01(2015)053", "10.1016/j.physletb.2016.07.042", "10.1007/jhep08(2014)103", "10.1007/jhep06(2015)100", "10.1140/epjc/s10052-015-3363-3", "10.1140/epjc/s10052-017-4915-5", "10.1140/epjc/s10052-014-3023-z", "10.1140/epjc/s10052-017-5315-6", "10.1140/epjc/s10052-016-4050-8", "10.3389/fpsyt.2017.00244", "10.1016/j.physletb.2014.10.002", "10.1007/jhep07(2015)162", "10.1007/jhep08(2014)174", "10.3897/zookeys.2.23", "10.1007/jhep07(2017)014", "10.1007/jhep04(2016)035", "10.1140/epjc/s10052-017-4984-5", "10.1007/jhep02(2016)156", "10.1016/j.physletb.2016.03.039", "10.1007/jhep07(2018)115", "10.3897/zookeys.34.268", "10.1007/jhep02(2016)122", "10.1016/j.physletb.2012.03.022", "10.1016/j.physletb.2018.09.019", "10.1016/j.physletb.2018.09.024", "10.1051/0004-6361/201629272", "10.1103/physrevc.97.024904", "10.1140/epjc/s10052-016-4521-y", "10.1140/epjc/s10052-016-4176-8", "10.1140/epjc/s10052-014-3134-6", "10.1140/epjc/s10052-016-4110-0", "10.1007/jhep07(2017)121", "10.1007/jhep07(2018)153", "10.1007/jhep03(2018)115", "10.1007/jhep04(2018)060", "10.11606/1807-0205/2020.60.06", "10.4126/frl01-0064015", "10.1007/jhep09(2017)020", "10.1016/j.physletb.2014.04.023", "10.1016/j.physletb.2015.02.048", "10.1007/jhep02(2018)032", "10.1016/j.physletb.2018.01.001", "10.1140/epjc/s10052-015-3852-4", "10.1007/jhep10(2014)087", "10.11646/zootaxa.4630.1.1", "10.5281/zenodo.3742118", "10.4126/frl01-0064022", "10.11646/zootaxa.4758.3.1", "10.11646/zootaxa.4772.3.1", "10.11646/zootaxa.4576.3.5", "10.4126/frl01-0064125", "10.1007/jhep12(2017)017", "10.4126/frl01-0064162", "10.4126/frl01-0064138", "10.1007/jhep06(2014)124", "10.1007/jhep06(2016)059", "10.1007/jhep06(2014)035", "10.1103/physrevd.90.052005", "10.1007/jhep11(2017)062", "10.3847/2041-8213/aa9aed", "10.1016/j.physletb.2016.06.080", "10.1007/jhep10(2017)073", "10.1007/jhep03(2018)167", "10.1016/j.physletb.2018.11.065", "10.1140/epjc/s10052-017-5081-5", "10.1140/epjc/s10052-015-3500-z", "10.1140/epjc/s10052-017-5445-x", "10.1016/j.physletb.2014.01.049", "10.1007/jhep03(2018)172", "10.1016/j.physletb.2015.03.048", "10.1016/j.physletb.2018.11.032", "10.1007/jhep05(2018)025", "10.1016/j.physletb.2016.08.052", "10.1016/j.physletb.2014.09.008", "10.1103/physrevlett.120.071802", "10.1016/j.physletb.2018.01.049", "10.1016/j.physletb.2016.06.017", "10.1016/j.physletb.2016.04.005", "10.1007/jhep06(2018)031", "10.1007/jhep01(2016)079", "10.1007/jhep10(2017)006", "10.1140/epjc/s10052-018-5740-1", "10.1016/j.physletb.2015.01.034", "10.1007/jhep10(2017)005", "10.1016/j.physletb.2018.04.007", "10.1007/jhep04(2015)164", "10.1140/epjc/s10052-018-5691-6", "10.1007/jhep05(2018)148", "10.1007/jhep03(2018)003", "10.1140/epjc/s10052-014-3076-z", "10.1016/j.physletb.2016.02.015", "10.1103/physrevd.97.072003", "10.1016/j.physletb.2017.11.054", "10.1140/epjc/s10052-011-1849-1", "10.1007/jhep09(2016)175", "10.1016/j.physletb.2017.12.011", "10.1007/jhep04(2014)103", "10.1007/jhep12(2014)017", "10.1016/j.physletb.2014.09.048", "10.1140/epjc/s10052-019-7202-9", "10.1007/jhep04(2014)191", "10.1007/jhep07(2013)163", "10.1140/epjc/s10052-014-3130-x", "10.1007/jhep04(2016)023", "10.1016/j.physletb.2015.07.023", "10.1140/epjc/s10052-018-6500-y", "10.1016/j.physletb.2015.04.045", "10.1007/jhep09(2017)053", "10.1007/jhep10(2017)180", "10.1140/epjc/s10052-017-4912-8", "10.1007/jhep10(2016)129", "10.3920/978-90-8686-816-2", "10.1007/jhep01(2017)099", "10.1007/jhep01(2018)045", "10.1007/jhep04(2015)025", "10.1016/j.physletb.2018.02.050", "10.1103/physrevlett.116.032301", "10.1007/jhep08(2017)029", "10.1007/jhep08(2017)073", "10.1016/j.physletb.2014.11.059", "10.1007/jhep01(2013)131", "10.1007/jhep06(2014)112", "10.1016/j.physletb.2017.09.066", "10.1140/epjc/s10052-014-2883-6", "10.1094/mpmi", "10.1007/jhep11(2017)195", "10.1007/jhep06(2018)108", "10.1007/jhep09(2018)139", "10.1016/j.physletb.2016.12.005", "10.1140/epjc/s10052-017-5349-9", "10.1016/j.physletb.2012.08.021", "10.1016/j.physletb.2014.10.032", "10.1007/jhep09(2017)088", "10.1140/epjc/s10052-015-3425-6", "10.1007/jhep01(2018)054", "10.1103/physrevlett.110.182302", "10.1140/epjc/s10052-017-5317-4", "10.1007/jhep01(2017)117", "10.1016/j.physletb.2017.12.006", "10.1016/j.physletb.2018.02.004", "10.1016/j.physletb.2018.02.025", "10.1016/j.physletb.2016.02.055", "10.1016/j.physletb.2016.04.061", "10.1140/epjc/s10052-015-3372-2", "10.1016/j.physletb.2015.02.051", "10.1016/j.physletb.2014.11.049", "10.1007/jhep09(2016)001", "10.1016/j.physletb.2016.03.017", "10.1007/jhep06(2016)067", "10.1140/epjc/s10052-015-3543-1", "10.1140/epjc/s10052-017-4911-9", "10.1007/jhep07(2013)122", "10.1140/epjc/s10052-019-6855-8", "10.1140/epjc/s10052-019-6540-y", "10.1007/jhep06(2014)009", "10.1007/jhep05(2019)043", "10.1016/j.physletb.2016.01.028", "10.1103/physrevlett.120.231801", "10.1140/epjc/s10052-016-4325-0", "10.1007/jhep07(2018)127", "10.1016/j.physletb.2016.05.003", "10.1140/epjc/s10052-017-4644-9", "10.1140/epjc/s10052-017-4700-5", "10.1007/jhep06(2018)107", "10.1016/j.physletb.2018.01.042", "10.1140/epjc/s10052-018-5624-4", "10.1007/jhep08(2016)139", "10.1007/jhep05(2018)195", "10.1103/physrevd.97.052012", "10.1140/epjc/s10052-016-3978-z", "10.1007/jhep05(2019)088", "10.1140/epjc/s10052-017-5079-z", "10.1140/epjc/s10052-016-4205-7", "10.1007/jhep01(2016)006", "10.1140/epjc/s10052-016-4286-3", "10.1016/j.physletb.2017.04.071", "10.1103/physrevd.97.012007", "10.1016/j.physletb.2018.01.077", "10.1007/jhep04(2018)073", "10.1016/j.physletb.2015.09.057", "10.1007/jhep07(2018)032", "10.1140/epjc/s10052-015-3435-4", "10.1007/jhep11(2017)010", "10.1093/isd/ixaa002", "10.1016/j.physletb.2018.03.035", "10.1007/jhep10(2018)031", "10.1016/s0140-6736(18)31891-9", "10.1140/epjc/s10052-018-6148-7", "10.1016/j.physletb.2018.03.057", "10.1140/epjc/s10052-019-6632-8", "10.1016/j.physletb.2015.11.071", "10.1140/epjc/s10052-018-5605-7", "10.1016/j.physletb.2018.10.073", "10.1140/epjc/s10052-019-7387-y", "10.1007/jhep06(2019)143", "10.1140/epjc/s10052-018-5567-9", "10.1140/epjc/s10052-019-6909-y", "10.1002/(sici)1521-3978(199901)47:1/3", "10.5281/zenodo.3758372", "10.4126/frl01-0064041", "10.1140/epjc/s10052-014-3129-3", "10.11646/zootaxa.4685.1.1", "10.11646/zootaxa.4756.1.1", "10.6101/azq/0001", "10.14582/duzg", "10.1016/j.physletb.2012.11.039", "10.4126/frl01-0064191", "10.1016/j.physletb.2013.12.029", "10.1007/jhep10(2013)189", "10.1051/0004-6361/201629512", "10.1007/jhep01(2013)116", "10.2312/gfz.lis.2016.001", "10.1016/j.physletb.2013.01.040", "10.1103/physrevd.90.112005", "10.1140/epjc/s10052-015-3726-9", "10.1007/s11682-013-9269-5", "10.1007/jhep02(2017)071", "10.1016/j.physletb.2016.09.040", "10.1007/jhep02(2017)117", "10.1007/jhep08(2016)009", "10.1103/physrevd.97.052010", "10.1007/jhep09(2017)032", "10.1103/physrevd.97.032005", "10.1140/epjc/s10052-017-4965-8", "10.1016/j.physletb.2016.08.042", "10.1016/j.physletb.2017.10.039", "10.1007/jhep03(2016)127", "10.1140/epjc/s10052-014-3034-9", "10.1007/jhep03(2017)113", "10.1007/jhep11(2018)040", "10.1140/epjc/s10052-018-6457-x", "10.1140/epjc/s10052-016-4041-9", "10.1140/epjc/s10052-018-6219-9", "10.1140/epjc/s10052-016-4149-y", "10.1007/jhep10(2017)072", "10.1140/epjc/s10052-016-4083-z", "10.1140/epjc/s10052-016-3956-5", "10.1007/jhep04(2016)073", "10.1007/jhep06(2016)177", "10.1016/j.physletb.2018.03.084", "10.1007/jhep10(2015)128", "10.1007/jhep03(2018)166", "10.1140/epjc/s10052-015-3491-9", "10.1016/j.physletb.2015.04.060", "10.1103/physrevd.92.112001", "10.1140/epjc/s10052-015-3367-z", "10.1007/jhep10(2017)019", "10.1007/jhep10(2017)131", "10.1016/j.physletb.2018.08.057", "10.1007/jhep01(2016)096", "10.1016/j.physletb.2017.09.053", "10.1007/jhep07(2017)013", "10.1007/jhep01(2019)030", "10.1007/jhep11(2016)110", "10.1016/j.physletb.2012.02.044", "10.1140/epjc/s10052-017-5192-z", "10.1007/jhep03(2015)022", "10.1140/epjc/s10052-019-6847-8", "10.1093/database/baz085", "10.1140/epjc/s10052-015-3451-4", "10.1007/jhep11(2017)029", "10.1140/epjc/s10052-015-3517-3", "10.1007/jhep07(2017)001", "10.1007/jhep09(2016)074", "10.1103/physrevd.97.072016", "10.1007/jhep05(2018)006", "10.1103/physrevlett.120.081801", "10.1103/physrevlett.120.161802", "10.1103/physrevlett.120.202005", "10.5281/zenodo.1299123", "10.5281/zenodo.3777294" ], + "pmid" : [], + "pmc" : [] +} \ No newline at end of file diff --git a/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java b/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java new file mode 100644 index 0000000..203cda0 --- /dev/null +++ b/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java @@ -0,0 +1,21 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import java.util.Set; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class BlackListProviderTest { + + @Test + public void blackListTest() { + + Assertions.assertNotNull(PidBlacklistProvider.getBlacklist()); + Assertions.assertNotNull(PidBlacklistProvider.getBlacklist().get("doi")); + Assertions.assertTrue(PidBlacklistProvider.getBlacklist().get("doi").size() > 0); + final Set xxx = PidBlacklistProvider.getBlacklist("xxx"); + Assertions.assertNotNull(xxx); + Assertions.assertEquals(0, xxx.size()); + } +} diff --git a/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java b/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java new file mode 100644 index 0000000..215d834 --- /dev/null +++ b/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java @@ -0,0 +1,75 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.IOException; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.oaf.Publication; + +public class IdentifierFactoryTest { + + private static ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + @Test + public void testCreateIdentifierForPublication() throws IOException { + + verifyIdentifier( + "publication_doi1.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true); + + verifyIdentifier( + "publication_doi2.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true); + + verifyIdentifier( + "publication_doi3.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true); + + verifyIdentifier( + "publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true); + + verifyIdentifier( + "publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true); + + verifyIdentifier( + "publication_pmc2.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true); + + final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"; + verifyIdentifier("publication_3.json", defaultID, true); + verifyIdentifier("publication_4.json", defaultID, true); + verifyIdentifier("publication_5.json", defaultID, true); + + } + + @Test + public void testCreateIdentifierForPublicationNoHash() throws IOException { + + verifyIdentifier("publication_doi1.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false); + verifyIdentifier("publication_doi2.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false); + verifyIdentifier("publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false); + verifyIdentifier( + "publication_urn1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false); + + final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"; + verifyIdentifier("publication_3.json", defaultID, false); + verifyIdentifier("publication_4.json", defaultID, false); + verifyIdentifier("publication_5.json", defaultID, false); + } + + protected void verifyIdentifier(String filename, String expectedID, boolean md5) throws IOException { + final String json = IOUtils.toString(getClass().getResourceAsStream(filename)); + final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class); + + String id = IdentifierFactory.createIdentifier(pub, md5); + + assertNotNull(id); + assertEquals(expectedID, id); + } + +} diff --git a/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java new file mode 100644 index 0000000..597f7e7 --- /dev/null +++ b/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -0,0 +1,69 @@ + +package eu.dnetlib.dhp.schema.oaf.utils; + +import static org.junit.jupiter.api.Assertions.*; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class OafMapperUtilsTest { + + private static ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + @Test + public void testMergePubs() throws IOException { + Publication p1 = read("publication_1.json", Publication.class); + Publication p2 = read("publication_2.json", Publication.class); + Dataset d1 = read("dataset_1.json", Dataset.class); + Dataset d2 = read("dataset_2.json", Dataset.class); + + assertEquals(p1.getCollectedfrom().size(), 1); + assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID); + assertEquals(d2.getCollectedfrom().size(), 1); + assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); + + assertTrue( + OafMapperUtils + .mergeResults(p1, d2) + .getResulttype() + .getClassid() + .equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)); + + assertEquals(p2.getCollectedfrom().size(), 1); + assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); + assertEquals(d1.getCollectedfrom().size(), 1); + assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); + + assertTrue( + OafMapperUtils + .mergeResults(p2, d1) + .getResulttype() + .getClassid() + .equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)); + } + + protected HashSet cfId(List collectedfrom) { + return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new)); + } + + protected T read(String filename, Class clazz) throws IOException { + final String json = IOUtils.toString(getClass().getResourceAsStream(filename)); + return OBJECT_MAPPER.readValue(json, clazz); + } + +} diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json new file mode 100644 index 0000000..e38c4d1 --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_1.json @@ -0,0 +1 @@ +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json new file mode 100644 index 0000000..52e4e12 --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json @@ -0,0 +1 @@ +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json new file mode 100644 index 0000000..704c5ad --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_1.json @@ -0,0 +1 @@ +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json new file mode 100644 index 0000000..a1744e8 --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_2.json @@ -0,0 +1 @@ +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository A"} ]} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_3.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_3.json new file mode 100644 index 0000000..6d33568 --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_3.json @@ -0,0 +1 @@ +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_4.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_4.json new file mode 100644 index 0000000..6617fe1 --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_4.json @@ -0,0 +1 @@ +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[]} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_5.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_5.json new file mode 100644 index 0000000..700a100 --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_5.json @@ -0,0 +1 @@ +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json new file mode 100644 index 0000000..83bc0cd --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json @@ -0,0 +1,33 @@ +{ + "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", + "instance": [ + { + "collectedfrom": { + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", + "value": "Crossref" + }, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.1016/j.cmet.2010.03.013" + } + ] + }, + { + "pid": [ + { + "qualifier": {"classid": "urn"}, + "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" + }, + { + "qualifier": {"classid": "scp-number"}, + "value": "79953761260" + }, + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + } + ] + } + ] +} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json new file mode 100644 index 0000000..5c73fc3 --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json @@ -0,0 +1,37 @@ +{ + "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", + "instance": [ + { + "collectedfrom": { + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", + "value": "Crossref" + }, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.1016/j.cmet.2010.03.013" + } + ] + }, + { + "collectedfrom": { + "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", + "value": "Europe PubMed Central" + }, + "pid": [ + { + "qualifier": {"classid": "urn"}, + "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" + }, + { + "qualifier": {"classid": "scp-number"}, + "value": "79953761260" + }, + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + } + ] + } + ] +} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json new file mode 100644 index 0000000..97c40d4 --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi3.json @@ -0,0 +1,37 @@ +{ + "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", + "instance": [ + { + "collectedfrom": { + "key": "10|openaire____::1234", + "value": "Zenodo" + }, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.1016/j.cmet.2010.03.013" + } + ] + }, + { + "collectedfrom": { + "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", + "value": "Europe PubMed Central" + }, + "pid": [ + { + "qualifier": {"classid": "urn"}, + "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" + }, + { + "qualifier": {"classid": "scp-number"}, + "value": "79953761260" + }, + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + } + ] + } + ] +} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json new file mode 100644 index 0000000..ac99ca9 --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi4.json @@ -0,0 +1,37 @@ +{ + "id": "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", + "instance": [ + { + "collectedfrom": { + "key": "10|openaire____::1234", + "value": "Zenodo" + }, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.1016/j.cmet.2010.03.013" + }, + { + "qualifier": {"classid": "handle"}, + "value": "11012/83840" + } + ] + }, + { + "collectedfrom": { + "key": "10|opendoar____::2852", + "value": "Digital library of Brno University of Technology" + }, + "pid": [ + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + }, + { + "qualifier": {"classid": "handle"}, + "value": "11012/83840" + } + ] + } + ] +} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json new file mode 100644 index 0000000..3e4ba22 --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc1.json @@ -0,0 +1 @@ +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}]} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json new file mode 100644 index 0000000..e7d49ee --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_pmc2.json @@ -0,0 +1,21 @@ +{ + "id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", + "instance": [ + { + "collectedfrom": { + "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", + "value": "Europe PubMed Central" + }, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.1016/j.cmet.2010.03.013" + }, + { + "qualifier":{"classid":"pmc"}, + "value":"21459329" + } + ] + } + ] +} \ No newline at end of file diff --git a/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_urn1.json b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_urn1.json new file mode 100644 index 0000000..5323ac8 --- /dev/null +++ b/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_urn1.json @@ -0,0 +1,23 @@ +{ + "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", + "pid": [ + { + "qualifier": { + "classid": "urn" + }, + "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" + }, + { + "qualifier": { + "classid": "scp-number" + }, + "value": "79953761260" + }, + { + "qualifier": { + "classid": "pmcid" + }, + "value": "21459329" + } + ] +} \ No newline at end of file