From 943b961cf63ae55e60fea2d626af557c5456545f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 2 Dec 2020 09:30:34 +0100 Subject: [PATCH] introduced PidBlacklist --- .../schema/oaf/utils/IdentifierFactory.java | 25 +++++++------ .../dhp/schema/oaf/utils/PidBlacklist.java | 8 +++- .../oaf/utils/PidBlacklistProvider.java | 37 ++++++++++++++++++- .../dhp/schema/oaf/utils/pid_blacklist.json | 5 +++ .../oaf/utils/BlackListProviderTest.java | 17 ++++++++- .../schema/oaf/utils/publication_doi1.json | 2 +- 6 files changed, 79 insertions(+), 15 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java index 5c3a4c37c..a6b2ce29b 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -1,13 +1,12 @@ package eu.dnetlib.dhp.schema.oaf.utils; +import java.io.IOException; import java.io.Serializable; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; +import java.util.*; import java.util.stream.Collectors; +import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.schema.oaf.CleaningFunctions; @@ -48,7 +47,8 @@ public class IdentifierFactory implements Serializable { Map> pids = entity .getPid() .stream() - .filter(s -> pidFilter(s)) + .map(CleaningFunctions::normalizePidValue) + .filter(IdentifierFactory::pidFilter) .collect( Collectors .groupingBy( @@ -83,17 +83,21 @@ public class IdentifierFactory implements Serializable { } protected static boolean pidFilter(StructuredProperty s) { + final String pidValue = s.getValue(); if (Objects.isNull(s.getQualifier()) || - StringUtils.isBlank(s.getValue()) || - StringUtils.isBlank(s.getValue().replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) { + StringUtils.isBlank(pidValue) || + StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) { return false; } - if (CleaningFunctions.PID_BLACKLIST.contains(StringUtils.trim(s.getValue().toLowerCase()))) { + if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) { + return false; + } + if (PidBlacklistProvider.getBlacklist(s.getQualifier().getClassid()).contains(pidValue)) { return false; } switch (PidType.tryValueOf(s.getQualifier().getClassid())) { case doi: - final String doi = StringUtils.trim(StringUtils.lowerCase(s.getValue())); + final String doi = StringUtils.trim(StringUtils.lowerCase(pidValue)); return doi.matches(DOI_REGEX); case original: return false; @@ -103,13 +107,12 @@ public class IdentifierFactory implements Serializable { } private static String idFromPid(T entity, StructuredProperty s, boolean md5) { - final String value = CleaningFunctions.normalizePidValue(s).getValue(); return new StringBuilder() .append(StringUtils.substringBefore(entity.getId(), ID_PREFIX_SEPARATOR)) .append(ID_PREFIX_SEPARATOR) .append(createPrefix(s.getQualifier().getClassid())) .append(ID_SEPARATOR) - .append(md5 ? DHPUtils.md5(value) : value) + .append(md5 ? DHPUtils.md5(s.getValue()) : s.getValue()) .toString(); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java index 85abbe350..0b8e5e3f1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklist.java @@ -1,2 +1,8 @@ -package eu.dnetlib.dhp.schema.oaf.utils;public class PidBlacklist { + +package eu.dnetlib.dhp.schema.oaf.utils; + +import java.util.HashMap; +import java.util.HashSet; + +public class PidBlacklist extends HashMap> { } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java index e044bedea..1c1c21f92 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidBlacklistProvider.java @@ -1,2 +1,37 @@ -package eu.dnetlib.dhp.schema.oaf.utils;public class PidBlacklistProvider { + +package eu.dnetlib.dhp.schema.oaf.utils; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; + +import org.apache.commons.io.IOUtils; + +import com.fasterxml.jackson.databind.ObjectMapper; + +public class PidBlacklistProvider { + + private static final PidBlacklist blacklist; + + static { + try { + String json = IOUtils.toString(IdentifierFactory.class.getResourceAsStream("pid_blacklist.json")); + blacklist = new ObjectMapper().readValue(json, PidBlacklist.class); + + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public static PidBlacklist getBlacklist() { + return blacklist; + } + + public static Set getBlacklist(String pidType) { + return Optional + .ofNullable(getBlacklist().get(pidType)) + .orElse(new HashSet<>()); + } + } diff --git a/dhp-common/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json index e69de29bb..05e8cde72 100644 --- a/dhp-common/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json +++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/schema/oaf/utils/pid_blacklist.json @@ -0,0 +1,5 @@ +{ + "doi" : [ "10.12739/10.12739", "10.11646/zootaxa.4404.1.1", "10.5281/zenodo.3678492", "10.11646/zootaxa.4757.1.1", "10.17176/20170811-142447", "10.6035/asparkia", "10.11646/zootaxa.4754.1.6", "10.11646/zootaxa.4784.1.1", "10.6035/millars", "10.11646/zootaxa.4776.1.1", "10.1590/1982-0224-20170094", "10.11646/zootaxa.4773.1.1", "10.11646/zootaxa.4744.1.1", "10.3897/zookeys.38.383", "10.1371/journal.", "10.5281/zenodo.3727017", "10.5252/zoosystema2019v41a15", "10.6035/dossiersf", "10.11646/zootaxa.4754.1.20", "10.6035/recerca", "10.11646/zootaxa.4428.1.1", "10.7179/psri", "10.11646/zootaxa.4785.1.1", "10.2478/aemnp-2018-0014", "10.17979/spudc.9788497497565", "10.2139/ssrn.2721313", "10.17979/spudc.9788497497749", "10.5281/zenodo.3760976", "10.11646/zootaxa.4381.1.1", "10.6035/tiempos", "10.11646/zootaxa.4754.1.10", "10.5281/zenodo.3776452", "10.11646/zootaxa.4754.1.16", "10.5252/zoosystema2019v41a26", "10.11646/zootaxa.4759.2.1", "10.11646/zootaxa.4741.1.1", "10.5252/zoosystema2019v41a4", "10.1145/nnnnnnn.nnnnnnn", "10.17979/spudc.9788497497169", "10.11646/zootaxa.4780.3.1", "10.11646/zootaxa.4663.1.1", "10.5281/zenodo.3748525", "10.5281/zenodo.3746744", "10.3920/978-90-8686-761-5", "10.14198/eurau18alicante", "10.5252/geodiversitas2019v41a8", "10.4126/38m-0000003", "10.5281/zenodo.3648511", "10.6035/clr", "10.4126/38m-0000004", "10.5281/zenodo.3732535", "10.5281/zenodo.3355776", "10.4126/38m-0000002", "10.11646/zootaxa.4763.3.3", "10.11646/zootaxa.4413.3.1", "10.1163/9789004416208_005", "10.4126/38m-0000001", "10.3897/zookeys.30.308", "10.4126/38m-0000000", "10.5281/zenodo.3739808", "10.5281/zenodo.3674873", "10.3161/00034541anz2020.70.1.003", "10.5281/zenodo.3738648", "10.11646/zootaxa.4765.1.1", "10.11646/zootaxa.4754.1.8", "10.3897/zookeys.36.306", "10.4230/lipics", "10.5281/zenodo.3758345", "10.3161/00034541anz2020.70.1.001", "10.3929/ethz-a-005427569", "10.11646/zootaxa.4772.1.1", "10.5281/zenodo.3677235", "10.11646/zootaxa.4766.1.1", "10.17509/jurnal", "10.1145/1235", "10.11646/zootaxa.4754.1.15", "10.2478/aemnp-2018-0018", "10.11646/zootaxa.4538.1.1", "10.11646/zootaxa.4740.1.1", "10.3897/zookeys.32.282", "10.3897/zookeys.2.56", "10.3897/zookeys.39.425", "10.11646/zootaxa.4514.3.3", "10.1007/978-94-007-1966-8", "10.3897/zookeys.26.214", "10.11646/zootaxa.4106.1.1", "10.3897/zookeys.22.219", "10.11646/zootaxa.4748.2.1", "10.5252/zoosystema2019v41a19", "10.3897/zookeys.22.122", "10.1080/00222933.2019.1634225", "10.11646/zootaxa.4632.1.1", "10.1007/s00259-016-3484-4", "10.3897/zookeys.19.221", "10.3897/zookeys.2.7", "10.11646/zootaxa.4777.1.1", "10.14279/depositonce-3753", "10.1111/apha.12712", "10.11646/zootaxa.4759.3.4", "10.11646/zootaxa.4754.1.9", "10.11646/zootaxa.4747.2.8", "10.5281/zenodo.3757451", "10.5281/zenodo.3740269", "10.5252/zoosystema2020v42a4", "10.1140/epje/i2013-13103-3", "10.1177/0301006619863862", "10.5281/zenodo.3726987", "10.12795/hid", "10.24042/jipf", "10.12795/e-rips", "10.1186/s12913-016-1423-5", "10.4126/38m-0000005", "10.3847/2041-8213/aa91c9", "10.1145/1122445.1122456", "10.1103/physrevlett.114.191803", "10.3920/978-90-8686-782-0", "10.11646/zootaxa.4739.1.1", "10.11646/zootaxa.4770.1.1", "10.21009/10.21009/jpd.081", "10.1080/15548627.2015.1100356", "10.12795/ricl", "10.3897/zookeys.34.309", "10.1080/00222933.2019.1692088", "10.4126/frl01-0064002", "10.1371/journal", "10.1175/1520-0485(2002)032", "10.3897/zookeys.22.152", "10.11646/zootaxa.4731.2.1", "10.4126/frl01-0064005", "10.11646/zootaxa.4738.1.1", "10.11646/zootaxa.4780.1.6", "10.4126/frl01-0064004", "10.6018/analesps.31.1.158071", "10.1007/jhep08(2016)045", "10.5281/zenodo.3759519", "10.4126/frl01-0064010", "10.11646/zootaxa.4537.1.1", "10.5281/zenodo.3713533", "10.5281/zenodo.3742020", "10.4126/frl01-0064014", "10.4126/frl01-0064001", "10.1000/isbn", "10.5281/zenodo.3777290", "10.4126/frl01-0064008", "10.1159/000440895", "10.3897/zookeys.31.140", "10.4126/frl01-0064003", "10.1080/00222933.2018.1524032", "10.21686/2500-3925-2014-6", "10.1016/j.bbr.2011.03.031", "10.4126/frl01-0064006", "10.4126/frl01-0064007", "10.4126/frl01-0064020", "10.4126/frl01-0064016", "10.2478/aemnp-2018-0013", "10.4126/frl01-0064021", "10.5281/zenodo.3754300", "10.15330/gal.29-30.", "10.3897/zookeys.2.4", "10.5252/zoosystema2019v41a7", "10.22435/bpk.v17i2", "10.4126/frl01-0063997", "10.3897/zookeys.11.160", "10.11646/zootaxa.4754.1.14", "10.4126/frl01-0064013", "10.1080/20013078.2018.1535750", "10.1016/j.", "10.4126/frl01-0064011", "10.1002/ece3.2579", "10.1088/0264-9381/28/9/094001", "10.3897/zookeys.2.25", "10.4126/frl01-0064019", "10.4126/frl01-0063994", "10.4126/frl01-0064135", "10.4126/frl01-0063998", "10.12795/ppa", "10.4126/frl01-0064009", "10.11646/zootaxa.4769.1.1", "10.11646/zootaxa.4419.1.1", "10.11646/zootaxa.4733.1.1", "10.4126/frl01-0063993", "10.3161/15081109acc2016.18.1.005", "10.11646/zootaxa.4763.1.2", "10.11646/zootaxa.4754.1.19", "10.4126/frl01-0064136", "10.4126/frl01-0064159", "10.4126/frl01-0063999", "10.4126/frl01-0064161", "10.1089/ten.tea.2015.5000.abstracts", "10.1002/(issn)1521-3773", "10.1140/epjc/s10052-015-3325-9", "10.1016/j.physletb.2016.04.050", "10.1007/jhep04(2015)117", "10.1111/gcb.14904", "10.1016/s0140-6736(17)32129-3", "10.11646/zootaxa.4748.1.1", "10.4126/frl01-0064078", "10.1140/epjc/s10052-015-3408-7", "10.1002/(issn)1097-4652", "10.1007/jhep06(2015)121", "10.1007/jhep09(2014)103", "10.1016/j.gca.2007.06.021", "10.1007/jhep09(2015)049", "10.3897/zookeys.4.32", "10.6101/azq/0002", "10.11646/zootaxa.4764.1.1", "10.11646/zootaxa.4772.1.5", "10.4126/frl01-0064000", "10.4126/frl01-0064131", "10.1016/j.physletb.2015.08.061", "10.1007/jhep01(2015)069", "10.1016/j.physletb.2016.06.039", "10.1016/j.physletb.2015.07.011", "10.1007/jhep04(2015)116", "10.3920/978-90-8686-797-4", "10.1016/j.physletb.2015.12.020", "10.1016/j.physletb.2015.04.042", "10.1016/j.physletb.2016.06.004", "10.1140/epjc/s10052-015-3261-8", "10.1016/j.physletb.2015.10.067", "10.1016/j.physletb.2015.07.065", "10.1163/1876312x-00002195", "10.1016/j.physletb.2013.12.010", "10.1016/j.physletb.2013.01.024", "10.1007/jhep11(2014)056", "10.1007/jhep12(2017)142", "10.1002/pds.4864", "10.1140/epjc/s10052-015-3262-7", "10.1016/j.physletb.2014.09.054", "10.1140/epjc/s10052-015-3373-1", "10.1007/jhep03(2015)041", "10.1016/j.physletb.2016.02.047", "10.4126/frl01-0064018", "10.1016/j.physletb.2014.01.042", "10.1007/jhep09(2014)037", "10.1007/978-94-017-7285-3", "10.1007/s00424-013-1401-2", "10.1007/s00259-017-3822-1", "10.1177/0301006616671273", "10.1007/jhep09(2014)112", "10.1007/jhep06(2015)116", "10.1140/epjc/s10052-018-6243-9", "10.1140/epjc/s10052-017-4692-1", "10.1007/jhep10(2015)144", "10.1007/jhep07(2017)107", "10.1007/jhep11(2014)088", "10.1016/j.physletb.2014.01.006", "10.1007/jhep01(2018)055", "10.1016/j.physletb.2016.03.060", "10.1140/epjc/s10052-019-6904-3", "10.11646/zootaxa.4737.1.1", "10.3934/xx.xx.xx.xx", "10.11646/zootaxa.4758.2.1", "10.1016/j.physletb.2015.10.004", "10.1016/j.physletb.2015.07.053", "10.5798/diclemedj.0921.2012.04.0184", "10.1007/jhep04(2014)169", "10.4126/frl01-0064160", "10.3989/aem.2001.v31.i2", "10.1039/x0xx00000x", "10.11646/zootaxa.3856.4.1", "10.4126/frl01-0064133", "10.1007/jhep05(2015)078", "10.1016/j.physletb.2012.08.020", "10.1007/jhep07(2015)032", "10.1159/000090218", "10.1016/j.physletb.2014.03.015", "10.1007/jhep09(2015)108", "10.1007/jhep09(2015)050", "10.1007/jhep01(2014)163", "10.1016/j.physletb.2014.11.026", "10.1140/epjc/s10052-016-4580-0", "10.1140/epjc/s10052-014-3109-7", "10.1140/epjc/s10052-014-3231-6", "10.1007/jhep02(2014)088", "10.1016/j.physletb.2016.01.056", "10.1016/j.physletb.2015.08.047", "10.1016/j.physletb.2015.12.039", "10.1007/jhep11(2015)071", "10.1140/epjc/s10052-015-3853-3", "10.1007/jhep04(2015)124", "10.1016/j.physletb.2015.07.010", "10.5281/zenodo.3413524", "10.1007/jhep04(2014)031", "10.1007/jhep07(2015)157", "10.1103/physrevd.90.052008", "10.1007/jhep11(2014)118", "10.3920/978-90-8686-708-0", "10.5281/zenodo.1136235", "10.1103/physrevd.86.032003", "10.1016/j.physletb.2016.01.032", "10.1007/jhep03(2018)174", "10.1007/jhep10(2017)182", "10.1140/epjst/e2019-900045-4", "10.1016/j.physletb.2015.06.070", "10.1140/epjc/s10052-016-4067-z", "10.1016/j.physletb.2015.11.042", "10.1007/jhep04(2018)033", "10.1007/jhep09(2014)145", "10.1016/j.physletb.2016.08.055", "10.1016/j.physletb.2015.04.002", "10.1007/jhep03(2014)032", "10.1140/epjc/s10052-017-5491-4", "10.1016/j.physletb.2015.09.062", "10.1016/j.physletb.2014.12.003", "10.1016/j.physletb.2015.03.017", "10.1140/epjc/s10052-014-3195-6", "10.1140/epjc/s10052-016-4034-8", "10.1140/epjc/s10052-016-4070-4", "10.1140/epjc/s10052-018-5693-4", "10.4126/frl01-0064017", "10.1007/jhep08(2014)173", "10.1016/j.physletb.2014.06.076", "10.1016/j.physletb.2018.11.064", "10.1140/epjc/s10052-017-4988-1", "10.11646/zootaxa.4258.4.3", "10.11646/zootaxa.4766.1.2", "10.11646/zootaxa.4780.1.1", "10.5281/zenodo.3693943", "10.4126/frl01-0064129", "10.15330/gal.28.", "10.1007/jhep02(2016)145", "10.1007/jhep04(2014)172", "10.1007/jhep04(2016)005", "10.1007/jhep03(2016)125", "10.1016/j.physletb.2018.02.033", "10.1007/jhep08(2017)052", "10.1007/jhep12(2017)085", "10.1007/jhep09(2014)176", "10.1007/jhep12(2017)024", "10.1140/epjc/s10052-018-5686-3", "10.1016/j.physletb.2016.11.035", "10.1016/j.physletb.2015.12.017", "10.1140/epjc/s10052-015-3542-2", "10.1140/epjc/s10052-014-3071-4", "10.1103/physrevd.97.032009", "10.1140/epjc/s10052-015-3306-z", "10.1016/j.physletb.2017.12.043", "10.1140/epjc/s10052-014-3233-4", "10.1016/j.physletb.2018.09.013", "10.1016/j.gca.2007.06.014", "10.1016/j.physletb.2016.05.005", "10.1038/s41586-019-1171-x", "10.1016/j.physletb.2016.05.087", "10.1007/jhep06(2018)022", "10.1016/j.physletb.2016.01.057", "10.1016/j.physletb.2018.03.023", "10.1140/epjc/s10052-015-3351-7", "10.1126/science.aap8757", "10.1007/jhep09(2015)137", "10.1007/jhep01(2015)063", "10.1007/jhep01(2018)126", "10.1016/j.gca.2007.06.020", "10.1140/epjc/s10052-018-5595-5", "10.1016/j.physletb.2015.02.015", "10.1016/j.physletb.2014.06.077", "10.1007/jhep12(2017)059", "10.1007/jhep10(2017)141", "10.1007/jhep02(2014)107", "10.1140/epjc/s10052-014-2965-5", "10.1016/j.physletb.2015.07.079", "10.1007/jhep10(2017)112", "10.1140/epjc/s10052-014-2982-4", "10.1007/jhep05(2016)160", "10.1016/j.physletb.2016.07.030", "10.1140/epjc/s10052-014-3168-9", "10.1140/epjc/s10052-018-5583-9", "10.1140/epjc/s10052-016-4184-8", "10.1007/jhep08(2015)105", "10.1007/jhep05(2015)061", "10.1103/physrevd.97.032003", "10.1140/epjc/s10052-014-3190-y", "10.1016/j.physletb.2012.10.061", "10.1140/epjc/s10052-014-2941-0", "10.1016/j.physletb.2016.02.002", "10.1016/j.physletb.2016.05.033", "10.1007/jhep01(2014)096", "10.1007/jhep09(2015)201", "10.1016/j.physletb.2016.01.010", "10.1016/j.physletb.2015.07.037", "10.1007/jhep07(2015)042", "10.1016/j.physletb.2016.05.044", "10.1016/j.physletb.2016.05.088", "10.3897/zookeys.2.2", "10.1007/jhep11(2015)018", "10.1007/jhep11(2015)189", "10.1016/j.physletb.2016.10.014", "10.1007/jhep06(2015)080", "10.1016/j.physletb.2014.11.042", "10.1140/epjc/s10052-014-3157-z", "10.1140/epjc/s10052-015-3406-9", "10.1016/j.physletb.2016.02.056", "10.1016/j.physletb.2015.03.054", "10.1140/epjc/s10052-016-4574-y", "10.5252/geodiversitas2019v41a15", "10.1007/jhep09(2014)094", "10.1140/epjc/s10052-017-5486-1", "10.1007/jhep03(2018)095", "10.11646/zootaxa.4736.1.1", "10.11646/zootaxa.4766.2.1", "10.5281/zenodo.3762392", "10.5281/zenodo.3761958", "10.11646/zootaxa.4403.3.2", "10.1553/iswimab", "10.11646/zootaxa.3750.5.1", "10.4126/frl01-0064134", "10.1103/physrevd.87.032002", "10.1140/epjc/s10052-013-2676-3", "10.1007/jhep02(2015)153", "10.1007/jhep08(2017)006", "10.1016/j.physletb.2016.11.005", "10.1007/jhep01(2013)029", "10.1007/jhep10(2017)132", "10.1016/j.physletb.2013.01.034", "10.1016/j.physletb.2016.03.046", "10.1140/epjc/s10052-016-3988-x", "10.1016/j.physletb.2016.07.006", "10.1140/epjc/s10052-018-5752-x", "10.1140/epjc/s10052-015-3454-1", "10.1002/ece3.1303", "10.1007/jhep02(2014)013", "10.1007/jhep06(2016)081", "10.1140/epjc/s10052-014-3117-7", "10.1007/jhep09(2017)084", "10.1016/j.physletb.2017.09.078", "10.1007/jhep08(2016)005", "10.1007/jhep01(2015)020", "10.1140/epjc/s10052-017-4852-3", "10.1016/j.physletb.2018.02.045", "10.7818/sibecolandaeetmeeting.2019", "10.1007/jhep11(2014)104", "10.1007/jhep05(2018)077", "10.1016/j.physletb.2016.11.045", "10.1016/j.physletb.2016.10.042", "10.1140/epjc/s10052-016-4203-9", "10.1007/jhep01(2015)068", "10.1007/jhep06(2016)093", "10.1016/j.physletb.2015.09.051", "10.1140/epjc/s10052-015-3534-2", "10.1007/jhep09(2014)087", "10.1016/j.physletb.2014.05.055", "10.1016/j.physletb.2014.02.033", "10.1140/epjc/s10052-017-5225-7", "10.1140/epjc/s10052-017-5442-0", "10.1016/s0140-6736(18)32335-3", "10.1016/j.physletb.2017.11.049", "10.1007/jhep06(2018)166", "10.1016/j.physletb.2016.05.002", "10.1140/epjc/s10052-016-4219-1", "10.1140/epjst/e2019-900087-0", "10.1007/jhep01(2016)166", "10.1007/jhep01(2018)097", "10.1016/j.physletb.2017.11.043", "10.1016/j.physletb.2018.04.036", "10.1140/epjc/s10052-018-5607-5", "10.1007/jhep12(2017)034", "10.1007/jhep11(2016)112", "10.1007/jhep06(2014)008", "10.1140/epjc/s10052-012-2261-1", "10.1016/j.physletb.2014.08.039", "10.1016/s0140-6736(16)31919-5", "10.1140/epjc/s10052-019-7058-z", "10.1016/j.physletb.2014.07.053", "10.1007/jhep01(2015)053", "10.1016/j.physletb.2016.07.042", "10.1007/jhep08(2014)103", "10.1007/jhep06(2015)100", "10.1140/epjc/s10052-015-3363-3", "10.1140/epjc/s10052-017-4915-5", "10.1140/epjc/s10052-014-3023-z", "10.1140/epjc/s10052-017-5315-6", "10.1140/epjc/s10052-016-4050-8", "10.3389/fpsyt.2017.00244", "10.1016/j.physletb.2014.10.002", "10.1007/jhep07(2015)162", "10.1007/jhep08(2014)174", "10.3897/zookeys.2.23", "10.1007/jhep07(2017)014", "10.1007/jhep04(2016)035", "10.1140/epjc/s10052-017-4984-5", "10.1007/jhep02(2016)156", "10.1016/j.physletb.2016.03.039", "10.1007/jhep07(2018)115", "10.3897/zookeys.34.268", "10.1007/jhep02(2016)122", "10.1016/j.physletb.2012.03.022", "10.1016/j.physletb.2018.09.019", "10.1016/j.physletb.2018.09.024", "10.1051/0004-6361/201629272", "10.1103/physrevc.97.024904", "10.1140/epjc/s10052-016-4521-y", "10.1140/epjc/s10052-016-4176-8", "10.1140/epjc/s10052-014-3134-6", "10.1140/epjc/s10052-016-4110-0", "10.1007/jhep07(2017)121", "10.1007/jhep07(2018)153", "10.1007/jhep03(2018)115", "10.1007/jhep04(2018)060", "10.11606/1807-0205/2020.60.06", "10.4126/frl01-0064015", "10.1007/jhep09(2017)020", "10.1016/j.physletb.2014.04.023", "10.1016/j.physletb.2015.02.048", "10.1007/jhep02(2018)032", "10.1016/j.physletb.2018.01.001", "10.1140/epjc/s10052-015-3852-4", "10.1007/jhep10(2014)087", "10.11646/zootaxa.4630.1.1", "10.5281/zenodo.3742118", "10.4126/frl01-0064022", "10.11646/zootaxa.4758.3.1", "10.11646/zootaxa.4772.3.1", "10.11646/zootaxa.4576.3.5", "10.4126/frl01-0064125", "10.1007/jhep12(2017)017", "10.4126/frl01-0064162", "10.4126/frl01-0064138", "10.1007/jhep06(2014)124", "10.1007/jhep06(2016)059", "10.1007/jhep06(2014)035", "10.1103/physrevd.90.052005", "10.1007/jhep11(2017)062", "10.3847/2041-8213/aa9aed", "10.1016/j.physletb.2016.06.080", "10.1007/jhep10(2017)073", "10.1007/jhep03(2018)167", "10.1016/j.physletb.2018.11.065", "10.1140/epjc/s10052-017-5081-5", "10.1140/epjc/s10052-015-3500-z", "10.1140/epjc/s10052-017-5445-x", "10.1016/j.physletb.2014.01.049", "10.1007/jhep03(2018)172", "10.1016/j.physletb.2015.03.048", "10.1016/j.physletb.2018.11.032", "10.1007/jhep05(2018)025", "10.1016/j.physletb.2016.08.052", "10.1016/j.physletb.2014.09.008", "10.1103/physrevlett.120.071802", "10.1016/j.physletb.2018.01.049", "10.1016/j.physletb.2016.06.017", "10.1016/j.physletb.2016.04.005", "10.1007/jhep06(2018)031", "10.1007/jhep01(2016)079", "10.1007/jhep10(2017)006", "10.1140/epjc/s10052-018-5740-1", "10.1016/j.physletb.2015.01.034", "10.1007/jhep10(2017)005", "10.1016/j.physletb.2018.04.007", "10.1007/jhep04(2015)164", "10.1140/epjc/s10052-018-5691-6", "10.1007/jhep05(2018)148", "10.1007/jhep03(2018)003", "10.1140/epjc/s10052-014-3076-z", "10.1016/j.physletb.2016.02.015", "10.1103/physrevd.97.072003", "10.1016/j.physletb.2017.11.054", "10.1140/epjc/s10052-011-1849-1", "10.1007/jhep09(2016)175", "10.1016/j.physletb.2017.12.011", "10.1007/jhep04(2014)103", "10.1007/jhep12(2014)017", "10.1016/j.physletb.2014.09.048", "10.1140/epjc/s10052-019-7202-9", "10.1007/jhep04(2014)191", "10.1007/jhep07(2013)163", "10.1140/epjc/s10052-014-3130-x", "10.1007/jhep04(2016)023", "10.1016/j.physletb.2015.07.023", "10.1140/epjc/s10052-018-6500-y", "10.1016/j.physletb.2015.04.045", "10.1007/jhep09(2017)053", "10.1007/jhep10(2017)180", "10.1140/epjc/s10052-017-4912-8", "10.1007/jhep10(2016)129", "10.3920/978-90-8686-816-2", "10.1007/jhep01(2017)099", "10.1007/jhep01(2018)045", "10.1007/jhep04(2015)025", "10.1016/j.physletb.2018.02.050", "10.1103/physrevlett.116.032301", "10.1007/jhep08(2017)029", "10.1007/jhep08(2017)073", "10.1016/j.physletb.2014.11.059", "10.1007/jhep01(2013)131", "10.1007/jhep06(2014)112", "10.1016/j.physletb.2017.09.066", "10.1140/epjc/s10052-014-2883-6", "10.1094/mpmi", "10.1007/jhep11(2017)195", "10.1007/jhep06(2018)108", "10.1007/jhep09(2018)139", "10.1016/j.physletb.2016.12.005", "10.1140/epjc/s10052-017-5349-9", "10.1016/j.physletb.2012.08.021", "10.1016/j.physletb.2014.10.032", "10.1007/jhep09(2017)088", "10.1140/epjc/s10052-015-3425-6", "10.1007/jhep01(2018)054", "10.1103/physrevlett.110.182302", "10.1140/epjc/s10052-017-5317-4", "10.1007/jhep01(2017)117", "10.1016/j.physletb.2017.12.006", "10.1016/j.physletb.2018.02.004", "10.1016/j.physletb.2018.02.025", "10.1016/j.physletb.2016.02.055", "10.1016/j.physletb.2016.04.061", "10.1140/epjc/s10052-015-3372-2", "10.1016/j.physletb.2015.02.051", "10.1016/j.physletb.2014.11.049", "10.1007/jhep09(2016)001", "10.1016/j.physletb.2016.03.017", "10.1007/jhep06(2016)067", "10.1140/epjc/s10052-015-3543-1", "10.1140/epjc/s10052-017-4911-9", "10.1007/jhep07(2013)122", "10.1140/epjc/s10052-019-6855-8", "10.1140/epjc/s10052-019-6540-y", "10.1007/jhep06(2014)009", "10.1007/jhep05(2019)043", "10.1016/j.physletb.2016.01.028", "10.1103/physrevlett.120.231801", "10.1140/epjc/s10052-016-4325-0", "10.1007/jhep07(2018)127", "10.1016/j.physletb.2016.05.003", "10.1140/epjc/s10052-017-4644-9", "10.1140/epjc/s10052-017-4700-5", "10.1007/jhep06(2018)107", "10.1016/j.physletb.2018.01.042", "10.1140/epjc/s10052-018-5624-4", "10.1007/jhep08(2016)139", "10.1007/jhep05(2018)195", "10.1103/physrevd.97.052012", "10.1140/epjc/s10052-016-3978-z", "10.1007/jhep05(2019)088", "10.1140/epjc/s10052-017-5079-z", "10.1140/epjc/s10052-016-4205-7", "10.1007/jhep01(2016)006", "10.1140/epjc/s10052-016-4286-3", "10.1016/j.physletb.2017.04.071", "10.1103/physrevd.97.012007", "10.1016/j.physletb.2018.01.077", "10.1007/jhep04(2018)073", "10.1016/j.physletb.2015.09.057", "10.1007/jhep07(2018)032", "10.1140/epjc/s10052-015-3435-4", "10.1007/jhep11(2017)010", "10.1093/isd/ixaa002", "10.1016/j.physletb.2018.03.035", "10.1007/jhep10(2018)031", "10.1016/s0140-6736(18)31891-9", "10.1140/epjc/s10052-018-6148-7", "10.1016/j.physletb.2018.03.057", "10.1140/epjc/s10052-019-6632-8", "10.1016/j.physletb.2015.11.071", "10.1140/epjc/s10052-018-5605-7", "10.1016/j.physletb.2018.10.073", "10.1140/epjc/s10052-019-7387-y", "10.1007/jhep06(2019)143", "10.1140/epjc/s10052-018-5567-9", "10.1140/epjc/s10052-019-6909-y", "10.1002/(sici)1521-3978(199901)47:1/3", "10.5281/zenodo.3758372", "10.4126/frl01-0064041", "10.1140/epjc/s10052-014-3129-3", "10.11646/zootaxa.4685.1.1", "10.11646/zootaxa.4756.1.1", "10.6101/azq/0001", "10.14582/duzg", "10.1016/j.physletb.2012.11.039", "10.4126/frl01-0064191", "10.1016/j.physletb.2013.12.029", "10.1007/jhep10(2013)189", "10.1051/0004-6361/201629512", "10.1007/jhep01(2013)116", "10.2312/gfz.lis.2016.001", "10.1016/j.physletb.2013.01.040", "10.1103/physrevd.90.112005", "10.1140/epjc/s10052-015-3726-9", "10.1007/s11682-013-9269-5", "10.1007/jhep02(2017)071", "10.1016/j.physletb.2016.09.040", "10.1007/jhep02(2017)117", "10.1007/jhep08(2016)009", "10.1103/physrevd.97.052010", "10.1007/jhep09(2017)032", "10.1103/physrevd.97.032005", "10.1140/epjc/s10052-017-4965-8", "10.1016/j.physletb.2016.08.042", "10.1016/j.physletb.2017.10.039", "10.1007/jhep03(2016)127", "10.1140/epjc/s10052-014-3034-9", "10.1007/jhep03(2017)113", "10.1007/jhep11(2018)040", "10.1140/epjc/s10052-018-6457-x", "10.1140/epjc/s10052-016-4041-9", "10.1140/epjc/s10052-018-6219-9", "10.1140/epjc/s10052-016-4149-y", "10.1007/jhep10(2017)072", "10.1140/epjc/s10052-016-4083-z", "10.1140/epjc/s10052-016-3956-5", "10.1007/jhep04(2016)073", "10.1007/jhep06(2016)177", "10.1016/j.physletb.2018.03.084", "10.1007/jhep10(2015)128", "10.1007/jhep03(2018)166", "10.1140/epjc/s10052-015-3491-9", "10.1016/j.physletb.2015.04.060", "10.1103/physrevd.92.112001", "10.1140/epjc/s10052-015-3367-z", "10.1007/jhep10(2017)019", "10.1007/jhep10(2017)131", "10.1016/j.physletb.2018.08.057", "10.1007/jhep01(2016)096", "10.1016/j.physletb.2017.09.053", "10.1007/jhep07(2017)013", "10.1007/jhep01(2019)030", "10.1007/jhep11(2016)110", "10.1016/j.physletb.2012.02.044", "10.1140/epjc/s10052-017-5192-z", "10.1007/jhep03(2015)022", "10.1140/epjc/s10052-019-6847-8", "10.1093/database/baz085", "10.1140/epjc/s10052-015-3451-4", "10.1007/jhep11(2017)029", "10.1140/epjc/s10052-015-3517-3", "10.1007/jhep07(2017)001", "10.1007/jhep09(2016)074", "10.1103/physrevd.97.072016", "10.1007/jhep05(2018)006", "10.1103/physrevlett.120.081801", "10.1103/physrevlett.120.161802", "10.1103/physrevlett.120.202005", "10.5281/zenodo.1299123", "10.5281/zenodo.3777294" ], + "pmid" : [], + "pmc" : [] +} \ No newline at end of file diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java index bd2eb4ec3..7cab5ed9c 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/BlackListProviderTest.java @@ -1,2 +1,17 @@ -package eu.dnetlib.dhp.schema.oaf.utils;public class BlackListProviderTest { + +package eu.dnetlib.dhp.schema.oaf.utils; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class BlackListProviderTest { + + @Test + public void blackListTest() { + + Assertions.assertNotNull(PidBlacklistProvider.getBlacklist()); + Assertions.assertNotNull(PidBlacklistProvider.getBlacklist().get("doi")); + Assertions.assertTrue(PidBlacklistProvider.getBlacklist().get("doi").size() > 0); + Assertions.assertNull(PidBlacklistProvider.getBlacklist("xxx")); + } } diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json index 6865d0ff6..da90a64d0 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi1.json @@ -1 +1 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}]} \ No newline at end of file +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[ {"qualifier":{"classid":"doi"},"value":"10.12739/10.12739"},{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}]} \ No newline at end of file