uoa-validator-engine2/src/main/java/eu/dnetlib/validator2/validation/utils/SupportedRegExs.java

101 lines
5.9 KiB
Java

package eu.dnetlib.validator2.validation.utils;
import java.util.regex.Pattern;
public class SupportedRegExs {
public static final String PROJECT_IDENTIFIER_REG_EX = "(info:eu-repo/grantAgreement/.*/.*/[0123456789]+)||(info:eu-repo/grantAgreement/.*/.*/[0123456789]+/.*/.*/.*)";
public static final Pattern COMPILED_PROJECT_IDENTIFIER_REGEX = Pattern.compile(PROJECT_IDENTIFIER_REG_EX);
public static final String LICENSE_CONDITION_REG_EX = "((((cc-by-sa)||(cc-by-nc-sa)), .*)||(\\(c\\) .*, [0-9]{4})||((http||https)://.*))";
public static final Pattern COMPILED_LICENSE_CONDITION_REG_EX = Pattern.compile(LICENSE_CONDITION_REG_EX);
public static final String EMBARGOED_END_DATE_REG_EX = "info:eu-repo/date/embargoEnd/((18|19|20|21)\\d\\d-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01]))";
public static final Pattern COMPILED_EMBARGOED_END_DATE_REG_EX = Pattern.compile(EMBARGOED_END_DATE_REG_EX);
public static final String ALT_IDENTIFIER_REG_EX = "info:eu-repo/semantics/altIdentifier/(ark|arxiv|doi|hdl|isbn|pissn|eissn|pmid|purl|urn|wos)/.*";
public static final Pattern COMPILED_ALT_IDENTIFIER_REG_EX = Pattern.compile(ALT_IDENTIFIER_REG_EX);
public static final String PUBLICATION_REFERENCE_REG_EX = "info:eu-repo/semantics/reference/(ark|arxiv|doi|hdl|isbn|issn|pmid|purl|url|urn|wos)/.*";
public static final Pattern COMPILED_PUBLICATION_REFERENCE_REG_EX = Pattern.compile(PUBLICATION_REFERENCE_REG_EX);
public static final String DATASET_REFERENCE_REG_EX = "info:eu-repo/semantics/dataset/(ark|doi|hdl|purl|url|urn)/.*";
public static final Pattern COMPILED_DATASET_REFERENCE_REG_EX = Pattern.compile(DATASET_REFERENCE_REG_EX);
public static final String PUBLICATION_DATE_REG_EX = "((14|15|16|17|19|20)\\d\\d(-(0[1-9]|1[012])(-(0[1-9]|[12][0-9]|3[01]))?)?$)|((14|15|16|17|19|20)\\d\\d-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):([0-5][0-9])(:[0-5][0-9](\\.\\d+)?)?(((\\+|-)([01][0-9]|2[0-3]):[0-5][0-9])|Z)$)";
public static final Pattern COMPILED_PUBLICATION_DATE_REG_EX = Pattern.compile(PUBLICATION_DATE_REG_EX);
public static final String ISO_8601_DATE_REG_EX = "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$";
public static final Pattern COMPILED_ISO_8601_DATE_REG_EX = Pattern.compile(ISO_8601_DATE_REG_EX);
public static final String YEAR_YYYY_REG_EX = "\\d{4}";
public static final Pattern COMPILED_YEAR_YYYY_REG_EX = Pattern.compile(YEAR_YYYY_REG_EX);
//1900-2999 range
public static final String YYYY_MM_DD_REGEX = "((2000|2400|2800|(19|2[0-9](0[48]|[2468][048]|[13579][26])))-02-29)"
+ "|(((19|2[0-9])[0-9]{2})-02-(0[1-9]|1[0-9]|2[0-8]))"
+ "|(((19|2[0-9])[0-9]{2})-(0[13578]|10|12)-(0[1-9]|[12][0-9]|3[01]))"
+ "|(((19|2[0-9])[0-9]{2})-(0[469]|11)-(0[1-9]|[12][0-9]|30))";
public static final Pattern COMPILED_YYYY_MM_DD_REGEX = Pattern.compile(YYYY_MM_DD_REGEX);
//No sanity check if end date is after start date
public static final String YYYY_MM_DD_RANGE_REGEX = "(" + YYYY_MM_DD_REGEX + ") - (" + YYYY_MM_DD_REGEX + ")";
public static final Pattern COMPILED_YYYY_MM_DD_RANGE_REGEX = Pattern.compile(YYYY_MM_DD_RANGE_REGEX);
public static final String BCP47_LANG_TAGS_REG_EX = "^(?<grandfathered>(?:en-GB-oed|i-(?:ami|bnn|default|enochian|hak|klingon|lux|mingo|navajo|pwn|t(?:a[oy]|su))|sgn-(?:BE-(?:FR|NL)|CH-DE))|(?:art-lojban|cel-gaulish|no-(?:bok|nyn)|zh-(?:guoyu|hakka|min(?:-nan)?|xiang)))|(?:(?<language>(?:[A-Za-z]{2,3}(?:-(?<extlang>[A-Za-z]{3}(?:-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(?:-(?<script>[A-Za-z]{4}))?(?:-(?<region>[A-Za-z]{2}|[0-9]{3}))?(?:-(?<variant>[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-(?<extension>[0-9A-WY-Za-wy-z](?:-[A-Za-z0-9]{2,8})+))*)(?:-(?<privateUse>x(?:-[A-Za-z0-9]{1,8})+))?$";
public static final Pattern COMPILED_BCP47_LANG_TAGS_REG_EX = Pattern.compile(BCP47_LANG_TAGS_REG_EX);
public static final String DOI_REG_EX = "10\\.\\d{4,}(\\.\\d+)*/[^\\s]+";
public static final Pattern COMPILED_DOI_REG_EX = Pattern.compile(DOI_REG_EX);
public static final String ISSN_REG_EX = "\\d{4}-?\\d{3}[\\dX]";
public static final Pattern COMPILED_ISSN_REG_EX = Pattern.compile(ISSN_REG_EX);
public static final String ISBN_REG_EX = "((?=.{17}$)978-\\d+-\\d+-\\d+-\\d)|" +
"((?=.{17}$)978 \\d+ \\d+ \\d+ \\d)|" +
"((?=.{17}$)979-[1-9]\\d*-\\d+-\\d+-\\d)|" +
"((?=.{17}$)979 [1-9]\\d* \\d+ \\d+ \\d)|" +
"(978\\d{10})|" +
"(979[1-9]\\d{9})|" +
"((?=.{13}$)\\d+-\\d+-\\d+-[\\dX])|" +
"((?=.{13}$)\\d+ \\d+ \\d+ [\\dX])|" +
"(\\d{9}[\\dX])";
public static final Pattern COMPILED_ISBN_REG_EX = Pattern.compile(ISBN_REG_EX);
public static final String ORCID_REG_EX = "https://orcid\\.org/0000-000(1-[5-9]|2-[0-9]|3-[0-4])[0-9]{3}-[0-9]{3}[0-9X]";
public static final Pattern COMPILED_ORCID_REG_EX = Pattern.compile(ORCID_REG_EX);
public static final String RESEARCHER_ID_REG_EX = "[A-Z]-[0-9]{4}-(19|20)[0-9][0-9]";
public static final Pattern COMPILED_RESEARCHER_ID_REG_EX = Pattern.compile(RESEARCHER_ID_REG_EX);
public static final String SCOPUS_AUTHOR_ID_REG_EX = "[0-9]{10,11}";
public static final Pattern COMPILED_SCOPUS_AUTHOR_ID_REG_EX = Pattern.compile(SCOPUS_AUTHOR_ID_REG_EX);
public static final String ISNI_REG_EX = "[0-9]{4} [0-9]{4} [0-9]{4} [0-9]{3}[0-9X]";
public static final Pattern COMPILED_ISNI_REG_EX = Pattern.compile(ISNI_REG_EX);
public static final String DAI_REG_EX = "info\\:eu\\-repo/dai/nl/\\d{8}[\\dxX]";
public static final Pattern COMPILED_DAI_REG_EX = Pattern.compile(DAI_REG_EX);
}