// concept should not appear with empty attribute id, i.e when there is no community - ugly, but seems to work (oaf:datasourceprefix = just any field available in all records)
oaf:fulltext = xpath:"//dc:identifier[starts-with(., 'http') and (ends-with(., 'document') or ends-with(., 'pdf'))]";
//if xpath:"//dc:identifier[starts-with(., 'http') and (ends-with(., 'document') or ends-with(., 'pdf'))] or //dc:relation[starts-with(lower-case(normalize-space(.)), 'info:eu-repo/grantagreement')] or //dc:rights[starts-with(lower-case(normalize-space(.)), 'open') or contains(lower-case(normalize-space(.)), 'openaccess')] or //dc:accessRights[contains(lower-case(normalize-space(.)), 'openaccess')]" $var0 = "''"; else dc:coverage = skipRecord();
if xpath:"//dc:identifier[starts-with(., 'http')]" $var0 = "''"; else dc:coverage = skipRecord();
$varIdDoi = identifierExtract('["//dc:identifier[starts-with(., \"info:\") or starts-with(., \"urn:\") or starts-with(., \"doi:\") or starts-with(., \"DOI:\") or starts-with(., \"Doi:\") or starts-with(., \"doi \") or starts-with(., \"DOI \") or starts-with(., \"Doi \") or starts-with(., \"10.\") or ((starts-with(., \"http\")) and contains(., \"doi.org/10.\"))]", "//dc:relation[starts-with(., \"info:eu-repo/semantics/altIdentifier/doi/10.\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/DOI/10.\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/Doi/10.\") or ((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/doi/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/DOI/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/Doi/http\")) and contains(., \"doi.org/10.\"))]"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
$varIdHdl = identifierExtract('["//dc:identifier[starts-with(., \"HDL:\") and not(starts-with(., \"HDL: http\"))][not(contains(., \"123456789\"))]", "//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/hdl/\") or (starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/url/\") and contains(., \"://hdl.handle.net/\"))]"]' , xpath:"./*[local-name()='record']" , '(?!(info:hdl:|://hdl.handle.net/|info:eu-repo/semantics/altIdentifier/hdl/))(\d.*)');
$varIdIsbn = xpath:"(//dc:identifier, //dc:source)[starts-with(lower-case(.), 'isbn') or starts-with(., '978') or starts-with(., '979')][(matches(., '(isbn[:\s]*)?97[89]-\d+-\d+-\d+-\d+$', 'i') and string-length(concat('97', substring-after(., '97'))) = 17) or matches(., '(isbn[:\s]*)?97[89]\d{10}$', 'i')]/replace(., 'isbn[:\s]*', '', 'i'), //dc:relation[starts-with(lower-case(.), 'info:eu-repo/semantics/altidentifier/isbn/')][(matches(., 'info:eu-repo/semantics/altIdentifier/isbn/97[89]-\d+-\d+-\d+-\d+$', 'i') and string-length(.) = 59) or matches(., 'info:eu-repo/semantics/altidentifier/isbn/97[89]\d{10}$', 'i')]/substring-after(lower-case(.), 'info:eu-repo/semantics/altidentifier/isbn/')";
$varIdBibc = identifierExtract('["//dc:identifier[starts-with(., \"BibCode:\") or starts-with(., \"BIBCODE:\") or (starts-with(., \"http:\") and contains(., \"bibcode=\"))]"]' , xpath:"./*[local-name()='record']" , '(^(BibCode:|BIBCODE:|http).*$)');
$varIdPmc = identifierExtract('["//dc:identifier[starts-with(., \"PUBMEDCENTRAL:\") or (starts-with(., \"http\") and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/PMC\"))]", "//dc:relation[starts-with(., \"info:eu-repo/semantics/altIdentifier/pmid/PMC\") or ((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/pmid/http\")) and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/PMC\"))]"]' , xpath:"./*[local-name()='record']" , '(PMC\d+)');
//$varIdHal = identifierExtract('["//dc:identifier[starts-with(., \"ads-\") or starts-with(., \"anses-\") or starts-with(., \"artxibo-\") or starts-with(., \"bioemco-\") or starts-with(., \"cea-\") or starts-with(., \"cel-\") or starts-with(., \"cirad-\") or starts-with(., \"edutice-\") or starts-with(., \"emse-\") or starts-with(., \"EMSE-\") or starts-with(., \"ensl-\") or starts-with(., \"hal-\") or starts-with(., \"HAL-\") or starts-with(., \"halsde-\") or starts-with(., \"halshs-\") or starts-with(., \"hprints-\") or starts-with(., \"in2p3-\") or starts-with(., \"ineris-\") or starts-with(., \"inria-\") or starts-with(., \"Inria-\") or starts-with(., \"inserm-\") or starts-with(., \"insu-\") or starts-with(., \"INSU-\") or starts-with(., \"ird-\") or starts-with(., \"irsn-\") or starts-with(., \"jpa-\") or starts-with(., \"lirmm-\") or starts-with(., \"medihal-\") or starts-with(., \"meteo-\") or starts-with(., \"mnhn-\") or starts-with(., \"obspm-\") or starts-with(., \"pastel-\") or starts-with(., \"pasteur-\") or starts-with(., \"Pasteur-\") or starts-with(., \"peer-\") or starts-with(., \"ssa-\") or starts-with(., \"tel-\") or starts-with(., \"ujm-\") or starts-with(., \"ijn_\") or starts-with(., \"sic_\") or (starts-with(., \"http\") and (contains(., \"://hal.archives-ouvertes.fr/hal\") or contains(., \"://halshs.archives-ouvertes.fr/hal\") or contains(., \"://halsde.archives-ouvertes.fr/hal\") or contains(., \"://medihal.archives-ouvertes.fr/hal\")))]", "//dc:relation[((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\")) and (contains(., \"://hal.archives-ouvertes.fr/hal\") or contains(., \"://halshs.archives-ouvertes.fr/hal\") or contains(., \"://halsde.archives-ouvertes.fr/hal\")))]"]' , xpath:"./*[local-name()='record']" , '((ads|anses|artxibo|bioemco|cea|cel|cirad|edutice|emse|EMSE|ensl|hal|HAL|halsde|halshs|hprints|in2p3|ineris|inria|Inria|inserm|insu|INSU|ird|irsn|jpa|lirmm|medihal|meteo|mnhn|obspm|pastel|pasteur|Pasteur|peer|ssa|tel|ujm)-|(ijn|sic)_).*');
$varIdArxv = identifierExtract('["//dc:identifier[((starts-with(., \"http\") or starts-with(., \"ArXiv: http\")) and (contains(., \"://arxiv.org/abs/\") or contains(., \"://arxiv.org/pdf/\"))) or starts-with(., \"arXiv:\") or starts-with(., \"ARXIV:\")]", "//dc:relation[(starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/arxiv/\") and not(contains(., \"/arxiv/http\"))) or ((starts-with(., \"info:eu-repo/semantics/altIdentifier/url/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/purl/http\") or starts-with(., \"info:eu-repo/semantics/altIdentifier/urn/http\") or starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/arxiv/http\")) and (contains(., \"://arxiv.org/abs/\") or contains(., \"://arxiv.org/pdf/\")))]"]' , xpath:"./*[local-name()='record']" , '(?!(://arxiv.org/abs/|:eu-repo/semantics/altIdentifier/arxiv/))([a-zA-Z].*)');
// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the ISSN (as e.g. print/online/...)