101 lines
5.9 KiB
Java
101 lines
5.9 KiB
Java
package eu.dnetlib.validator2.validation.utils;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
public class SupportedRegExs {
|
|
|
|
public static final String PROJECT_IDENTIFIER_REG_EX = "(info:eu-repo/grantAgreement/.*/.*/[0123456789]+)||(info:eu-repo/grantAgreement/.*/.*/[0123456789]+/.*/.*/.*)";
|
|
|
|
public static final Pattern COMPILED_PROJECT_IDENTIFIER_REGEX = Pattern.compile(PROJECT_IDENTIFIER_REG_EX);
|
|
|
|
public static final String LICENSE_CONDITION_REG_EX = "((((cc-by-sa)||(cc-by-nc-sa)), .*)||(\\(c\\) .*, [0-9]{4})||((http||https)://.*))";
|
|
|
|
public static final Pattern COMPILED_LICENSE_CONDITION_REG_EX = Pattern.compile(LICENSE_CONDITION_REG_EX);
|
|
|
|
public static final String EMBARGOED_END_DATE_REG_EX = "info:eu-repo/date/embargoEnd/((18|19|20|21)\\d\\d-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01]))";
|
|
|
|
public static final Pattern COMPILED_EMBARGOED_END_DATE_REG_EX = Pattern.compile(EMBARGOED_END_DATE_REG_EX);
|
|
|
|
public static final String ALT_IDENTIFIER_REG_EX = "info:eu-repo/semantics/altIdentifier/(ark|arxiv|doi|hdl|isbn|pissn|eissn|pmid|purl|urn|wos)/.*";
|
|
|
|
public static final Pattern COMPILED_ALT_IDENTIFIER_REG_EX = Pattern.compile(ALT_IDENTIFIER_REG_EX);
|
|
|
|
public static final String PUBLICATION_REFERENCE_REG_EX = "info:eu-repo/semantics/reference/(ark|arxiv|doi|hdl|isbn|issn|pmid|purl|url|urn|wos)/.*";
|
|
|
|
public static final Pattern COMPILED_PUBLICATION_REFERENCE_REG_EX = Pattern.compile(PUBLICATION_REFERENCE_REG_EX);
|
|
|
|
public static final String DATASET_REFERENCE_REG_EX = "info:eu-repo/semantics/dataset/(ark|doi|hdl|purl|url|urn)/.*";
|
|
|
|
public static final Pattern COMPILED_DATASET_REFERENCE_REG_EX = Pattern.compile(DATASET_REFERENCE_REG_EX);
|
|
|
|
public static final String PUBLICATION_DATE_REG_EX = "((14|15|16|17|19|20)\\d\\d(-(0[1-9]|1[012])(-(0[1-9]|[12][0-9]|3[01]))?)?$)|((14|15|16|17|19|20)\\d\\d-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):([0-5][0-9])(:[0-5][0-9](\\.\\d+)?)?(((\\+|-)([01][0-9]|2[0-3]):[0-5][0-9])|Z)$)";
|
|
|
|
public static final Pattern COMPILED_PUBLICATION_DATE_REG_EX = Pattern.compile(PUBLICATION_DATE_REG_EX);
|
|
|
|
public static final String ISO_8601_DATE_REG_EX = "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$";
|
|
|
|
public static final Pattern COMPILED_ISO_8601_DATE_REG_EX = Pattern.compile(ISO_8601_DATE_REG_EX);
|
|
|
|
public static final String YEAR_YYYY_REG_EX = "\\d{4}";
|
|
|
|
public static final Pattern COMPILED_YEAR_YYYY_REG_EX = Pattern.compile(YEAR_YYYY_REG_EX);
|
|
|
|
//1900-2999 range
|
|
public static final String YYYY_MM_DD_REGEX = "((2000|2400|2800|(19|2[0-9](0[48]|[2468][048]|[13579][26])))-02-29)"
|
|
+ "|(((19|2[0-9])[0-9]{2})-02-(0[1-9]|1[0-9]|2[0-8]))"
|
|
+ "|(((19|2[0-9])[0-9]{2})-(0[13578]|10|12)-(0[1-9]|[12][0-9]|3[01]))"
|
|
+ "|(((19|2[0-9])[0-9]{2})-(0[469]|11)-(0[1-9]|[12][0-9]|30))";
|
|
|
|
public static final Pattern COMPILED_YYYY_MM_DD_REGEX = Pattern.compile(YYYY_MM_DD_REGEX);
|
|
|
|
//No sanity check if end date is after start date
|
|
public static final String YYYY_MM_DD_RANGE_REGEX = "(" + YYYY_MM_DD_REGEX + ") - (" + YYYY_MM_DD_REGEX + ")";
|
|
|
|
public static final Pattern COMPILED_YYYY_MM_DD_RANGE_REGEX = Pattern.compile(YYYY_MM_DD_RANGE_REGEX);
|
|
|
|
public static final String BCP47_LANG_TAGS_REG_EX = "^(?<grandfathered>(?:en-GB-oed|i-(?:ami|bnn|default|enochian|hak|klingon|lux|mingo|navajo|pwn|t(?:a[oy]|su))|sgn-(?:BE-(?:FR|NL)|CH-DE))|(?:art-lojban|cel-gaulish|no-(?:bok|nyn)|zh-(?:guoyu|hakka|min(?:-nan)?|xiang)))|(?:(?<language>(?:[A-Za-z]{2,3}(?:-(?<extlang>[A-Za-z]{3}(?:-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(?:-(?<script>[A-Za-z]{4}))?(?:-(?<region>[A-Za-z]{2}|[0-9]{3}))?(?:-(?<variant>[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-(?<extension>[0-9A-WY-Za-wy-z](?:-[A-Za-z0-9]{2,8})+))*)(?:-(?<privateUse>x(?:-[A-Za-z0-9]{1,8})+))?$";
|
|
|
|
public static final Pattern COMPILED_BCP47_LANG_TAGS_REG_EX = Pattern.compile(BCP47_LANG_TAGS_REG_EX);
|
|
|
|
public static final String DOI_REG_EX = "10\\.\\d{4,}(\\.\\d+)*/[^\\s]+";
|
|
|
|
public static final Pattern COMPILED_DOI_REG_EX = Pattern.compile(DOI_REG_EX);
|
|
|
|
public static final String ISSN_REG_EX = "\\d{4}-?\\d{3}[\\dX]";
|
|
|
|
public static final Pattern COMPILED_ISSN_REG_EX = Pattern.compile(ISSN_REG_EX);
|
|
|
|
public static final String ISBN_REG_EX = "((?=.{17}$)978-\\d+-\\d+-\\d+-\\d)|" +
|
|
"((?=.{17}$)978 \\d+ \\d+ \\d+ \\d)|" +
|
|
"((?=.{17}$)979-[1-9]\\d*-\\d+-\\d+-\\d)|" +
|
|
"((?=.{17}$)979 [1-9]\\d* \\d+ \\d+ \\d)|" +
|
|
"(978\\d{10})|" +
|
|
"(979[1-9]\\d{9})|" +
|
|
"((?=.{13}$)\\d+-\\d+-\\d+-[\\dX])|" +
|
|
"((?=.{13}$)\\d+ \\d+ \\d+ [\\dX])|" +
|
|
"(\\d{9}[\\dX])";
|
|
|
|
public static final Pattern COMPILED_ISBN_REG_EX = Pattern.compile(ISBN_REG_EX);
|
|
|
|
public static final String ORCID_REG_EX = "https://orcid\\.org/0000-000(1-[5-9]|2-[0-9]|3-[0-4])[0-9]{3}-[0-9]{3}[0-9X]";
|
|
|
|
public static final Pattern COMPILED_ORCID_REG_EX = Pattern.compile(ORCID_REG_EX);
|
|
|
|
public static final String RESEARCHER_ID_REG_EX = "[A-Z]-[0-9]{4}-(19|20)[0-9][0-9]";
|
|
|
|
public static final Pattern COMPILED_RESEARCHER_ID_REG_EX = Pattern.compile(RESEARCHER_ID_REG_EX);
|
|
|
|
public static final String SCOPUS_AUTHOR_ID_REG_EX = "[0-9]{10,11}";
|
|
|
|
public static final Pattern COMPILED_SCOPUS_AUTHOR_ID_REG_EX = Pattern.compile(SCOPUS_AUTHOR_ID_REG_EX);
|
|
|
|
public static final String ISNI_REG_EX = "[0-9]{4} [0-9]{4} [0-9]{4} [0-9]{3}[0-9X]";
|
|
|
|
public static final Pattern COMPILED_ISNI_REG_EX = Pattern.compile(ISNI_REG_EX);
|
|
|
|
public static final String DAI_REG_EX = "info\\:eu\\-repo/dai/nl/\\d{8}[\\dxX]";
|
|
|
|
public static final Pattern COMPILED_DAI_REG_EX = Pattern.compile(DAI_REG_EX);
|
|
|
|
}
|