merged from dhp_oaf_model

This commit is contained in:
Claudio Atzori 2021-03-09 11:37:41 +01:00
parent 59532b0919
commit 765f9bdee7
5 changed files with 27 additions and 7 deletions

View File

@ -13,7 +13,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
public class CleaningFunctions { public class CleaningFunctions {
public static final String DOI_PREFIX_REGEX = "^.*10\\."; public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10.)";
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/"; public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)"; public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
@ -277,7 +277,7 @@ public class CleaningFunctions {
// TODO add cleaning for more PID types as needed // TODO add cleaning for more PID types as needed
case "doi": case "doi":
pid.setValue(value.toLowerCase().replaceAll(DOI_URL_PREFIX_REGEX, "")); pid.setValue(value.toLowerCase().replaceAll(DOI_PREFIX_REGEX, "10."));
break; break;
} }
return pid; return pid;

View File

@ -9,9 +9,9 @@ import java.util.function.Function;
import java.util.function.Predicate; import java.util.function.Predicate;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.common.LicenseComparator;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
@ -327,10 +327,10 @@ public class OafMapperUtils {
protected static Qualifier getBestAccessRights(final List<Instance> instanceList) { protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
if (instanceList != null) { if (instanceList != null) {
final Optional<Qualifier> min = instanceList final Optional<AccessRight> min = instanceList
.stream() .stream()
.map(i -> i.getAccessright()) .map(i -> i.getAccessright())
.min(new LicenseComparator()); .min(new AccessRightComparator<>());
final Qualifier rights = min.isPresent() ? min.get() : new Qualifier(); final Qualifier rights = min.isPresent() ? min.get() : new Qualifier();

View File

@ -8,6 +8,11 @@ import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.oaf.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.CleaningFunctions;
@ -30,6 +35,13 @@ public class IdentifierFactory implements Serializable {
public static final int ID_PREFIX_LEN = 12; public static final int ID_PREFIX_LEN = 12;
public static final HashBiMap<String, String> PID_AUTHORITY = HashBiMap.create(2);
static {
PID_AUTHORITY.put(ModelConstants.CROSSREF_ID, "Crossref");
PID_AUTHORITY.put(ModelConstants.DATACITE_ID, "Datacite");
}
/** /**
* Creates an identifier from the most relevant PID (if available) in the given entity T. Returns entity.id * Creates an identifier from the most relevant PID (if available) in the given entity T. Returns entity.id
* when no PID is available * when no PID is available
@ -43,6 +55,14 @@ public class IdentifierFactory implements Serializable {
return entity.getId(); return entity.getId();
} }
if (Optional.ofNullable(
entity.getCollectedfrom())
.map(c -> c.stream()
.noneMatch(cf -> PID_AUTHORITY.containsKey(cf.getKey()) || PID_AUTHORITY.containsValue(cf.getValue())))
.orElse(true)) {
return entity.getId();
}
Map<String, List<StructuredProperty>> pids = entity Map<String, List<StructuredProperty>> pids = entity
.getPid() .getPid()
.stream() .stream()

View File

@ -23,7 +23,7 @@ public class IdentifierFactoryTest {
public void testCreateIdentifierForPublication() throws IOException { public void testCreateIdentifierForPublication() throws IOException {
verifyIdentifier( verifyIdentifier(
"publication_doi1.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2011.03.013"), true); "publication_doi1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
verifyIdentifier( verifyIdentifier(
"publication_doi2.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2010.03.013"), true); "publication_doi2.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2010.03.013"), true);
verifyIdentifier("publication_pmc1.json", "50|pmc_________::" + DHPUtils.md5("21459329"), true); verifyIdentifier("publication_pmc1.json", "50|pmc_________::" + DHPUtils.md5("21459329"), true);

View File

@ -1 +1 @@
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2010.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}]} {"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2010.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}]}