forked from D-Net/dnet-hadoop
merged from dhp_oaf_model
This commit is contained in:
parent
59532b0919
commit
765f9bdee7
|
@ -13,7 +13,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
|
||||||
public class CleaningFunctions {
|
public class CleaningFunctions {
|
||||||
|
|
||||||
public static final String DOI_PREFIX_REGEX = "^.*10\\.";
|
public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10.)";
|
||||||
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
|
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
|
||||||
public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
|
public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
|
||||||
|
|
||||||
|
@ -277,7 +277,7 @@ public class CleaningFunctions {
|
||||||
|
|
||||||
// TODO add cleaning for more PID types as needed
|
// TODO add cleaning for more PID types as needed
|
||||||
case "doi":
|
case "doi":
|
||||||
pid.setValue(value.toLowerCase().replaceAll(DOI_URL_PREFIX_REGEX, ""));
|
pid.setValue(value.toLowerCase().replaceAll(DOI_PREFIX_REGEX, "10."));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return pid;
|
return pid;
|
||||||
|
|
|
@ -9,9 +9,9 @@ import java.util.function.Function;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.LicenseComparator;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
@ -327,10 +327,10 @@ public class OafMapperUtils {
|
||||||
|
|
||||||
protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
|
protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
|
||||||
if (instanceList != null) {
|
if (instanceList != null) {
|
||||||
final Optional<Qualifier> min = instanceList
|
final Optional<AccessRight> min = instanceList
|
||||||
.stream()
|
.stream()
|
||||||
.map(i -> i.getAccessright())
|
.map(i -> i.getAccessright())
|
||||||
.min(new LicenseComparator());
|
.min(new AccessRightComparator<>());
|
||||||
|
|
||||||
final Qualifier rights = min.isPresent() ? min.get() : new Qualifier();
|
final Qualifier rights = min.isPresent() ? min.get() : new Qualifier();
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,11 @@ import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import com.google.common.collect.HashBiMap;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
import com.google.common.collect.Sets;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.CleaningFunctions;
|
import eu.dnetlib.dhp.schema.oaf.CleaningFunctions;
|
||||||
|
@ -30,6 +35,13 @@ public class IdentifierFactory implements Serializable {
|
||||||
|
|
||||||
public static final int ID_PREFIX_LEN = 12;
|
public static final int ID_PREFIX_LEN = 12;
|
||||||
|
|
||||||
|
public static final HashBiMap<String, String> PID_AUTHORITY = HashBiMap.create(2);
|
||||||
|
|
||||||
|
static {
|
||||||
|
PID_AUTHORITY.put(ModelConstants.CROSSREF_ID, "Crossref");
|
||||||
|
PID_AUTHORITY.put(ModelConstants.DATACITE_ID, "Datacite");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an identifier from the most relevant PID (if available) in the given entity T. Returns entity.id
|
* Creates an identifier from the most relevant PID (if available) in the given entity T. Returns entity.id
|
||||||
* when no PID is available
|
* when no PID is available
|
||||||
|
@ -43,6 +55,14 @@ public class IdentifierFactory implements Serializable {
|
||||||
return entity.getId();
|
return entity.getId();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Optional.ofNullable(
|
||||||
|
entity.getCollectedfrom())
|
||||||
|
.map(c -> c.stream()
|
||||||
|
.noneMatch(cf -> PID_AUTHORITY.containsKey(cf.getKey()) || PID_AUTHORITY.containsValue(cf.getValue())))
|
||||||
|
.orElse(true)) {
|
||||||
|
return entity.getId();
|
||||||
|
}
|
||||||
|
|
||||||
Map<String, List<StructuredProperty>> pids = entity
|
Map<String, List<StructuredProperty>> pids = entity
|
||||||
.getPid()
|
.getPid()
|
||||||
.stream()
|
.stream()
|
||||||
|
|
|
@ -23,7 +23,7 @@ public class IdentifierFactoryTest {
|
||||||
public void testCreateIdentifierForPublication() throws IOException {
|
public void testCreateIdentifierForPublication() throws IOException {
|
||||||
|
|
||||||
verifyIdentifier(
|
verifyIdentifier(
|
||||||
"publication_doi1.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2011.03.013"), true);
|
"publication_doi1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
|
||||||
verifyIdentifier(
|
verifyIdentifier(
|
||||||
"publication_doi2.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2010.03.013"), true);
|
"publication_doi2.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2010.03.013"), true);
|
||||||
verifyIdentifier("publication_pmc1.json", "50|pmc_________::" + DHPUtils.md5("21459329"), true);
|
verifyIdentifier("publication_pmc1.json", "50|pmc_________::" + DHPUtils.md5("21459329"), true);
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2010.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}]}
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2010.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}]}
|
||||||
|
|
Loading…
Reference in New Issue