new xslts and test records #110

Closed
andreas.czerniak wants to merge 16 commits from stable_ids into stable_ids
13 changed files with 1578 additions and 2 deletions

View File

@ -59,7 +59,7 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
final XsltCompiler comp = processor.newXsltCompiler();
QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));
comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getNsPrefix()));
QName datasourceNameParam = new QName(DATASOURCE_NAME_PARAM);
comp.setParameter(datasourceNameParam, new XdmAtomicValue(value.getProvenance().getDatasourceName()));
XsltExecutable xslt = comp

View File

@ -117,6 +117,36 @@ public class TransformationJobTest extends AbstractVocabularyTest {
// TODO Create significant Assert
}
@Test
@DisplayName("Test Transform record XML with xslt_cleaning_datarepo_datacite (B2FIND)")
public void testTransformMostlyB2FindScript() throws Exception {
String xslTransformationScript = "";
String xmlRecord = "";
String DATASOURCEID = "re3data_____::r3d100012377";
String DATASOURCENAME = "B2Find";
// B2FIND
xmlRecord = "/eu/dnetlib/dhp/transform/b2find_record.xml";
xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_datarepo_datacite.xsl";
// xmlRecord = "/eu/dnetlib/dhp/transform/input_itgv4.xml";
// xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid.xsl";
// We Set the input Record getting the XML from the classpath
final MetadataRecord mr = new MetadataRecord();
mr.setProvenance(new Provenance(DATASOURCEID, DATASOURCENAME, "PREFIX"));
mr.setBody(IOUtils.toString(getClass().getResourceAsStream(xmlRecord)));
// We Load the XSLT transformation Rule from the classpath
XSLTTransformationFunction tr = loadTransformationRule(xslTransformationScript);
MetadataRecord result = tr.call(mr);
// Print the record
System.out.println(result.getBody());
// TODO Create significant Assert
}
@Test
@DisplayName("Test Transform record XML with xslt_cleaning_REST_OmicsDI")
public void testTransformRestScript() throws Exception {

View File

@ -0,0 +1,76 @@
package eu.dnetlib.dhp.transformation.xslt;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension;
@ExtendWith(MockitoExtension.class)
public class PersonCleanerTest {
private PersonCleaner personCleaner;
@BeforeEach
public void setup() {
this.personCleaner = new PersonCleaner();
}
@Test
public void shouldGetFirstname() {
List<String> actualValue = personCleaner.getFirstname();
// TODO: assert scenario
}
@Test
public void shouldGetSurname() {
List<String> actualValue = personCleaner.getSurname();
// TODO: assert scenario
}
@Test
public void shouldGetFullname() {
List<String> actualValue = personCleaner.getFullname();
// TODO: assert scenario
}
@Test
public void shouldHash() {
String actualValue = personCleaner.hash();
// TODO: assert scenario
}
@Test
public void shouldGetNormalisedFullname() {
String actualValue = personCleaner.getNormalisedFullname();
// TODO: assert scenario
}
@Test
public void shouldGetCapitalSurname() {
List<String> actualValue = personCleaner.getCapitalSurname();
// TODO: assert scenario
}
@Test
public void shouldGetNameWithAbbreviations() {
List<String> actualValue = personCleaner.getNameWithAbbreviations();
// TODO: assert scenario
}
@Test
public void shouldIsAccurate() {
boolean actualValue = personCleaner.isAccurate();
// TODO: assert scenario
}
}

View File

@ -0,0 +1,50 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns="http://www.openarchives.org/OAI/2.0/">
<header>
<identifier>cb22585e-98fe-4661-973c-0e2c66647b2e</identifier>
<datestamp>2021-03-15T15:55:39Z</datestamp>
<setSpec>eudat-b2find</setSpec>
</header>
<metadata>
<oai_datacite xmlns="http://schema.datacite.org/oai/oai-1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.datacite.org/oai/oai-1.0/ http://schema.datacite.org/oai/oai-1.0/oai.xsd">
<isReferenceQuality>false</isReferenceQuality>
<schemaVersion>4.3</schemaVersion>
<datacentreSymbol>EUDAT B2FIND</datacentreSymbol>
<payload>
<resource xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.3/metadata.xsd">
<creators>
<creator>
<creatorName>Viken fylkeskommune</creatorName>
</creator>
</creators>
<titles>
<title>Asgjerrud søndre</title>
</titles>
<descriptions>
<description descriptionType="Abstract">Kleberforekomst. Lite markert. I stien og i en forsenkning i terrenget på S-sida er det en hel del kleberavfall. Ved besøket fantes ikke fast fjell med kleber i, men når en roter i jorda, finner en ubearbeidede klebersteinsstykker. Noen av dem har hakkespor. Det er mulig at hele forsenkningen i terrenget skyldes at man har hogd fram kleber. Det ser ikke ut som det kan ha vært særlig stor drift. Herfra skal det ha vært tatt kleber til Eidsberg middelalderkirke. Fortalt av Helge Enger. Ingen vet hvor lenge man har tatt kleber her. Forsenkningen er orientert NV-SØ ca 12m, br 7m, dybde 2,5m.</description>
</descriptions>
<geoLocations>
<geoLocation>
<geoLocationPlace>(11.702 LON, 59.494 LAT)</geoLocationPlace>
</geoLocation>
</geoLocations>
<subjects>
<subject>Arkeologisk minne</subject>
<subject>Steinbrudd</subject>
<subject>Archaeology</subject>
</subjects>
<alternateIdentifiers>
<alternateIdentifier alternateIdentifierType="URL">http://kulturminnesok.no/ra/lokalitet/19916</alternateIdentifier>
</alternateIdentifiers>
<version>20180301</version>
<publicationYear>2004</publicationYear>
<publisher>Askeladden</publisher>
<language>Norwegian</language>
<rightsList>
<rights>NLOD (https://data.norge.no/nlod/en/2.0/)</rights>
</rightsList>
</resource>
</payload>
</oai_datacite>
</metadata>
</record>

View File

@ -0,0 +1,400 @@
// dc_cleaning_OPENAIREplus_compliant ; 2021-05-03
declare_script "dc_cleaning_OpenAIREplus_compliant";
declare_ns oaf = "http://namespace.openaire.eu/oaf";
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
declare_ns dc = "http://purl.org/dc/elements/1.1/";
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
declare_ns oai = "http://www.openarchives.org/OAI/2.0/";
declare_ns xs = "http://www.w3.org/2001/XMLSchema";
$var0 = "''";
$varFP7 = "'corda_______::'";
$varH2020 = "'corda__h2020::'";
$varAKA = "'aka_________::'"; // tbd, no statements yet
$varAFF = "'aff_________::'";
$varARC = "'arc_________::'";
$varCONICYT = "'conicytf____::'";
$varDFG = "'dfgf________::'";
$varFCT = "'fct_________::'";
$varFWF = "'fwf_________::'";
$varGSRT = "'gsrt________::'";
$varHRZZ = "'irb_hr______::'";
$varINNOVIRIS = "'innoviris___::'";
$varMESTD = "'mestd_______::'";
$varMIUR = "'miur________::'"; // tbd, no statements yet
$varMZOS = "'irb_hr______::'";
$varNHMRC = "'nhmrc_______::'";
$varNIH = "'nih_________::'";
$varNSF = "'nsf_________::'";
$varNWO = "'nwo_________::'";
$varRCUK = "'rcuk________::'";
$varRIF = "'rif_________::'";
$varRSF = "'rsf_________::'";
$varSFI ="'sfi_________::'";
$varSGOV = "'sgov________::'";
$varSNSF = "'snsf________::'";
$varTARA = "'taraexp_____::'";
$varTUBITAK = "'tubitakf____::'";
$varWT = "'wt__________::'";
$varDummy = "''";
// variables from calling context (can't request on the fly @ Hadoop)
// should varOfficialName, varDataSourceId
static $varDatasourceid = getValue( PROFILEFIELD, [xpath:"concat('collection(&amp;apos;/db/DRIVER/RepositoryServiceResources&amp;apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&amp;quot;NamespacePrefix&amp;quot;][value=&amp;quot;', //oaf:datasourceprefix, '&amp;quot;]]')", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"concat('collection(&amp;apos;/db/DRIVER/RepositoryServiceResources&amp;apos;)//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=&amp;quot;NamespacePrefix&amp;quot;][value=&amp;quot;', //oaf:datasourceprefix, '&amp;quot;]]')", xpath:"//CONFIGURATION/OFFICIAL_NAME"]);
static $varRepoid = xpath:"//dri:repositoryId";
dri:objIdentifier = xpath:"//dri:objIdentifier";
dri:repositoryId = $varRepoid;
dri:recordIdentifier = xpath:"//dri:recordIdentifier";
// skip/terminate test records
if xpath:"//oaf:datasourceprefix[.='od______2659'] and //dc:title[lower-case(.) = 'popper test archive'] and //dc:creator[lower-case(.) = 'author, test'] and //dc:description[starts-with(lower-case(.), 'a short description of the article')]" dc:title = skipRecord(); else $varDummy= "''";
if xpath:"//oaf:datasourceprefix[.='od______2659'] and //dc:title[lower-case(.) = ('test doc', 'test_publish', 'test html', 'final_test')] and //dc:description = //dc:title" dc:title = skipRecord(); else $varDummy= "''";
if xpath:"count(//*[matches(., '^test(test|[\s\d,])*$', 'i')]) &gt;= 2" dc:title = skipRecord(); else $varDummy= "''";
if xpath:"//oai:setSpec[.='col_data_1694'] or //dc:creator[starts-with(., 'test')]" dc:coverage = skipRecord(); else $varDummy = "''";
apply xpath:"//dc:creator[not(//oaf:datasourceprefix = 'od______4037')]" if xpath:"string-length(.) &gt; 0 and not(contains(., 'US National Cancer Institute')) and normalize-space(.) != ','" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
apply xpath:"//dc:creator[//oaf:datasourceprefix = 'od______4037']" if xpath:"string-length(.) &gt; 0 and normalize-space(.) != ',' and normalize-space(.)!='()'" dc:creator = xpath:"normalize-space(replace(., '^((.|\s)*)\((.|\s)*\)\s*', '$1'))"; else $varDummy = "''";
if xpath:"//dc:title[string-length(.)&gt; 0] and not(//dc:creator[.='Test'])" $varDummy = "''"; else dc:coverage = skipRecord();
apply xpath:"//dc:title" if xpath:"string-length(.) &gt; 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
//
//apply xpath:"//dc:subject" if xpath:"string-length(.) &gt; 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
//
// subject
// gather subjects: from fields setSpec, subject, classification, keywords
// assign context: if field value or @xsi:type refers to an approved vocabulary/classification/thesaurus, assign its normed code
// normalise form: in case of approved vocabulary/classification/thesaurus: 'context:subject', otherwise: 'subject [additional information]'
// remove duplicates: identical pairs of value/term and context/vocabulary
$subjVocHarv = xpath:"'agrovoc','acm','bicssc','bk','ddc','gok','jel classification','jel codes','jelelement','jel','lcsh','mesh','msc','pacs','rvk','udc'"; // subject contexts/vocabularies as harvested
$subjVocCode = xpath:"'agrovoc','ccs','bicssc','bk','ddc','gok','jel', 'jel', 'jel', 'jel','lcsh','mesh','msc','pacs','rvk','udc'"; // subject contexts/vocabularies as normed within OpenAIRE
$subjVoc = xpath:"concat('(',string-join($subjVocHarv,'|'),')')"; // regular expression for subject contexts
//$subjVocVal = xpath:"concat('^\s*','((info:eu-repo/classification/)?',$subjVoc,'[:/].*)')"; // regular expression for subject contexts in field values
$subjVocVal = xpath:"concat('^\s*','((info:eu-repo/classification/|http://www.ncbi.nlm.nih.gov/|http://aims.fao.org/aos/)?',$subjVoc,'[:/].*)')"; // regular expression for subject contexts in field values
$subjVocPar = xpath:"concat('^\s*','(dcterms:\s*)?',$subjVoc,'\s*$')"; // regular expression for subject contexts in field parameters
// subject context: approved vocabulary/classification/thesaurus in field value
//dc:subject = set(xpath:"(//dc:subject|//*[local-name()='setSpec'])[string-length(.) &gt; 0 and matches(., $subjVocVal,'i')]", @classid=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1)";, @classname=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1)";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";);
//$subjListInVal = xpath:"(//dc:subject|//*[local-name()='setSpec'])[string-length(.) &gt; 0 and matches(., $subjVocVal,'i')]/concat(./subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1),':',normalize-space(replace(.,'(info:eu-repo/classification/)?([^/:]*)[:/](.*)','$3')))";
$subjListInVal = xpath:"(//dc:subject[not(//oaf:datasourceprefix = 'od______3694')]|//*[local-name()='setSpec'])[string-length(.) &gt; 0 and matches(., $subjVocVal,'i')]/concat(./subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(.),$subjVocVal,'$3','i')),1),':',normalize-space(replace(.,'(info:eu-repo/classification/|http://www.ncbi.nlm.nih.gov/|http://aims.fao.org/aos/)?([^/:]*)[:/](.*)','$3')))";
// subject context: approved vocabulary/classification/thesaurus in field parameter
//dc:subject = set(xpath:"(//dc:subject|//*[local-name()='classification'])[string-length(.) &gt; 0 and matches(./@xsi:type, $subjVocPar,'i')]", @classid=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,rep
lace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1)";, @classname=xpath:"subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1)";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";);
$subjListInPar = xpath:"(//dc:subject|//*[local-name()='classification'])[string-length(.) &gt; 0 and matches(./@xsi:type, $subjVocPar,'i')]/concat(./subsequence($subjVocCode,index-of($subjVocHarv,replace(lower-case(./@xsi:type),$subjVocPar,'$2','i')),1),':',normalize-space(.))";
// subject context: approved vocabulary/classification/thesaurus in field value or parameter
$subjListInParAndVal = xpath:"distinct-values(insert-before($subjListInVal,0,$subjListInPar))";
dc:subject = set(xpath:"$subjListInParAndVal", @classid=xpath:"substring-before(.,':')";, @classname=xpath:"substring-before(.,':')";, @schemeid=xpath:"'dnet:subject_classification_typologies'";, @schemename=xpath:"'dnet:subject_classification_typologies'";);
// subject context: no (approved) vocabulary/classification/thesaurus
//dc:subject = set(xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) &gt; 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i'))]", @classid=xpath:"'keyword'";, @classname=xpath:"'keyword'";, @schemeid=xpath:"'dnet:result_subject'";, @schemename=xpath:"'dnet:result_subject'";);
//$subListKeywords = xpath:"distinct-values((//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) &gt; 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i'))]/replace(concat(normalize-space(replace(.,'((info:eu-repo/classification/[^/]*/)|([^:]*:))(.*)','$4')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',substring-before(replace(.,'(info:eu-repo/classification/)?([^/:]*)[/:](.*)','$2:$3'),':'))),']'),' \[\]',''))";
$subjListKeywordsInfo = xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) &gt; 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i')) and starts-with(.,'info:eu-repo/classification/')]
/replace(concat(normalize-space(replace(.,'info:eu-repo/classification/[^/]*/(.*)','$1')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',replace(.,'info:eu-repo/classification/([^/]*)/.*','$1'))),']'),' \[\]','')";
$subjListKeywordsColon = xpath:"(//dc:subject|//*[local-name()='classification']|//*[local-name()='keywords'])[string-length(.) &gt; 0 and not(matches(., $subjVocVal,'i')) and not(matches(./@xsi:type,$subjVocPar,'i')) and not(starts-with(.,'info:eu-repo/classification/'))]/replace(concat(normalize-space(replace(.,'[^:]*:(.*)','$1')),' [',normalize-space(concat(replace(./@xsi:type,'dcterms:',''),' ',substring-before(.,':'))),']'),' \[\]','')";
$subjListKeywordsInfoAndColon = xpath:"distinct-values(insert-before($subjListKeywordsInfo,0,$subjListKeywordsColon))";
dc:subject = set(xpath:"$subjListKeywordsInfoAndColon", @classid=xpath:"'keyword'";, @classname=xpath:"'keyword'";, @schemeid=xpath:"'dnet:result_subject'";, @schemename=xpath:"'dnet:result_subject'";);
// Fachrepositorium Lebenswissenschaften's odd subject declaration don't fit in the subject handling, i.e. stop transformation
dc:subject = xpath:"//dc:subject[//oaf:datasourceprefix = 'od______3694'][string-length(.) &gt; 0 and matches(., $subjVocVal,'i')]/normalize-space(replace(., '\n', ' '))";
//
apply xpath:"//dc:publisher" if xpath:"string-length(.) &gt; 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
apply xpath:"//dc:source" if xpath:"string-length(.) &gt; 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
dc:contributor = xpath:"//dc:contributor";
dc:description = xpath:"string-join(//dc:description[concat(normalize-space(.), '')], codepoints-to-string(10))";
$varHttpTest = "''";
//if xpath:"//dc:identifier[starts-with(normalize-space(.), 'http')][not(starts-with(., 'http://hdl.handle.net/123456789') or starts-with(., 'https://hdl.handle.net/123456789'))]" $varHttpTest = "true"; else dc:identifier = skipRecord();
//apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
dr:dateOfCollection = xpath:"//dri:dateOfCollection";
static dr:dateOfTransformation = xpath:"current-dateTime()";
dc:type = xpath:"//dc:type";
dc:format = xpath:"//dc:format";
dc:date = xpath:"distinct-values(//dc:date)";
// - Language
dc:language = Convert(xpath:"//dc:language", Languages);
// dc:language = "eng";
// if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
$varDateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
if xpath:"//oaf:datasourceprefix[.='od_______883']" oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date[3]", DateISO8601); else $varDummy= "''";
if xpath:"//oaf:datasourceprefix[.='od______3063']" oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601); else $varDummy= "''";
if xpath:"(//oaf:datasourceprefix[.='od______2658'] or //oaf:datasourceprefix[.='od______1318']) and starts-with($varDateAccepted, '1000')" oaf:dateAccepted = $varDummy; else $varDummy= "''";
if xpath:"not(//oaf:datasourceprefix[.='od_______883']) and not(//oaf:datasourceprefix[.='od______3063']) and not(starts-with($varDateAccepted, '10') or starts-with($varDateAccepted, '00'))" oaf:dateAccepted = $varDateAccepted; else $varDummy= "''";
// - EmbargoEndDate
// apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
$varEmbargoEnd = xpath:"distinct-values(//dc:date[matches(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', 'i')][contains(lower-case(.), 'info:eu-repo')]/replace(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', '$3', 'i'))";
oaf:embargoenddate = $varEmbargoEnd;
// --- projects ---
// FP7
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2006][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/fp7/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
// ERC (provided by OAPEN)
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european research council.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFP7, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/european union.* seventh framework programme.*/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
// H2020
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')][year-from-date(xs:date(max(($varDateAccepted, '0001-01-01')))) gt 2013][contains(lower-case(.), 'info:eu-repo')]/concat($varH2020, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement[/]+ec/h2020/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
// AFF
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varAFF, replace(normalize-space(.), '(info:eu-repo/grantagreement/aff)/(.*)/(\d{3}\.?\d{2,3}).*', '$3', 'i'))";
// AKA \d*
oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/aka/[^/]*/(\d+)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varAKA, replace(normalize-space(.), 'info:eu-repo/grantagreement/aka/[^/]*/(\d+)(/.*)?', '$1', 'i')))";
// ARC ([A-Z]+[\d/]*|\d+)
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*)/([A-Z]+[\d/]*|\d+)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varARC, replace(normalize-space(.), '(info:eu-repo/grantagreement/arc)/(.*?)/([A-Z]+[\d/]*|\d+)', '$3', 'i'))";
// CONICYT \d{7,8}
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/conicyt/.*/.*?(\d{7,8})', 'i')]/concat($varCONICYT, replace(normalize-space(.), 'info:eu-repo/grantagreement/conicyt/.*/.*?(\d{7,8})', '$1', 'i'))";
// DFG \d{7,9}
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/dfg)/(.*)/(.*?)(\d{7,9})', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varDFG, replace(normalize-space(.), '(info:eu-repo/grantagreement/dfg)/(.*?)/.*?(\d{7,9})', '$3', 'i'))";
// FCT (SFRH/BD/)(\d+)(/\d+) ... ((SFRH|PRAXIS XXI|PD|FMRH)/[A-Z]*/)?\d*(/\d*)? ...
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/fct/[^/]*/[^/]+', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varFCT, replace(normalize-space(.), 'info:eu-repo/grantagreement/fct/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
// FWF [A-Z]{1,3} \d*
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/fwf/[^/]*/.*?([A-Z]{1,3} \d*).*', 'i')]/concat($varFWF, replace(normalize-space(.), 'info:eu-repo/grantagreement/fwf/[^/]*/.*?([A-Z]{1,3} \d*).*', '$1', 'i'))";
// GSRT
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/gsrt/[^/]*/[^/]+', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varGSRT, replace(normalize-space(.), 'info:eu-repo/grantagreement/gsrt/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
// HRZZ info:eu-repo/grantagreement/HRZZ/[^/]*/([^/]*|[^/]*/\d*)(/.*)?
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/hrzz/[^/]*/([^/]*|[^/]*/\d*)(/[^\d].*)?', 'i')]/concat($varHRZZ, replace(normalize-space(.), 'info:eu-repo/grantagreement/hrzz/[^/]*/([^/]*|[^/]*/\d*)(/[^\d].*)?', '$1', 'i'))";
// INNOVIRIS
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/innoviris/[^/]*/[^/]+', 'i')]/concat($varINNOVIRIS, replace(normalize-space(.), 'info:eu-repo/grantagreement/innoviris/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
// MESTD \d*
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/mestd/[^/]*/\d+', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMESTD, replace(normalize-space(.), 'info:
eu-repo/grantagreement/mestd/[^/]*/(\d+)(/.*)?', '$1', 'i'))";
// MESTD
//oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMESTD, replace(normalize-space(.), '(info:eu-repo/grantagreement/mestd)/(.+)/(\d+)(.*)', '$3', 'i'))";
// MIUR [A-Z0-9]*
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/miur/[^/]*/.*?[A-Z0-9]*', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varMIUR, replace(normalize-space(.), 'info:eu-repo/grantagreement/miur/[^/]*/.*?([A-Z0-9]*).*?', '$1', 'i'))";
// MZOS \d{3}-\d{7}-\d{4}
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/mzos/[^/]*/.*?(\d{3}-\d{7}-\d{4})', 'i')]/concat($varMZOS, replace(normalize-space(.), 'info:eu-repo/grantagreement/mzos/[^/]*/.*?(\d{3}-\d{7}-\d{4}).*', '$1', 'i'))";
// NHMRC \d{3,6}
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nhmrc/.*/.*?(\d{3,6})', 'i')]/concat($varNHMRC, replace(normalize-space(.), 'info:eu-repo/grantagreement/nhmrc/.*/.*?(\d{3,6})', '$1', 'i'))";
// NIH ([A-Z\d]*-[A-Z\d]*|ALM 1200300-300-0-1|CIT S&amp;?SF-0-0-1|[A-Z\d]{10}\*[5-7]-0-0-1) ... hm
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nih/[^/]*/[^/]+', 'i')]/concat($varNIH, replace(normalize-space(.), 'info:eu-repo/grantagreement/nih/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
// NSF (\d{7}|\d{2}[A-Z]\d{4})
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nsf/[^/]*/[^/]+', 'i')]/concat($varNSF, replace(normalize-space(.), 'info:eu-repo/grantagreement/nsf/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
// NWO
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/nwo/[^/]*/[^/]+', 'i')]/concat($varNWO, replace(normalize-space(.), 'info:eu-repo/grantagreement/nwo/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
// RCUK
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/rcuk/[^/]*/[^/]+', 'i')]/concat($varRCUK, replace(normalize-space(.), 'info:eu-repo/grantagreement/rcuk/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
// RIF
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/rif/[^/]*/[^/]+', 'i')]/concat($varRIF, replace(normalize-space(.), 'info:eu-repo/grantagreement/rif/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
// RSF
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/rsf/[^/]*/[^/]+', 'i')]/concat($varRSF, replace(normalize-space(.), 'info:eu-repo/grantagreement/rsf/[^/]*/([^/]+)(/.*)?$', '$1', 'i'))";
// SFI
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/sfi/[^/]*/[^/]+', 'i')]/concat($varSFI, replace(normalize-space(.), 'info:eu-repo/grantagreement/sfi/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
// SGOV
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/sgov/[^/]*/[^/]+', 'i')]/concat($varSGOV, replace(normalize-space(.), 'info:eu-repo/grantagreement/sgov/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
// SNSF
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/snsf/[^/]*/[^/]+', 'i')]/concat($varSNSF, replace(normalize-space(.), 'info:eu-repo/grantagreement/snsf/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
// TARA
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/tara/[^/]*/[^/]+', 'i')]/concat($varTARA, replace(normalize-space(.), 'info:eu-repo/grantagreement/tara/[^/]*/([^/]+)(/.*)?', '$1', 'i'))";
// TUBITAK
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), 'info:eu-repo/grantagreement/tubitak/[^/]*/\d{3}[A-Z]\d{2,3}', 'i')]/concat($varTUBITAK, replace(normalize-space(.), 'info:eu-repo/grantagreement/tubitak/[^/]*/(\d{3}[A-Z]\d{2,3})(/.*)?', '$1', 'i'))";
// WT
//oaf:projectid = xpath:"distinct-values(//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($
varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i')))";
// WT
oaf:projectid = xpath:"//dc:relation[matches(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', 'i')][contains(lower-case(.), 'info:eu-repo')]/concat($varWT, replace(normalize-space(.), '(.*)(info:eu-repo/grantagreement/wellcome trust/-/)(\d\d\d\d\d\d)(.*)', '$3', 'i'))";
//dc:relation = xpath:"//dc:relation";
dc:relation = xpath:"//dc:relation[not(contains(., 'info:eu-repo/semantics/altIdentifier/') and substring-before(substring-after(., 'info:eu-repo/semantics/altIdentifier/'), '/') = ('ark', 'arxiv', 'doi', 'hdl', 'isbn', 'urn', 'pmid', 'wos', 'issn', 'eissn', 'url'))][not(starts-with(., 'http') and (contains(., '://localhost/') or contains(., '://localhost:')))][count(index-of(($varIdDoi//value, $varIdDoiNonStd//value), replace(., '(info:doi:|doi:|info:doi/|https?://(dx.)?doi.org/)', ''))) = 0]";
dc:relation = xpath:"subsequence(distinct-values(($varIdDoi//value, $varIdDoiNonStd//value)), 2)";
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
//
//
oaf:collectedDatasourceid = xpath:"$varDatasourceid";
//
// dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies);
// if xpath:"//dc:type[1]/lower-case(.) = 'text'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type", TextTypologies);
// if xpath:"//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_'" dr:CobjCategory = Convert(xpath:"reverse(//dc:type | //oai:setSpec)", TextTypologies); else dr:CobjCategory = Convert(xpath:"//dc:type | //oai:setSpec", TextTypologies);
$varCobjCategoryReverse = Convert(xpath:"insert-before(reverse(//dc:type) , 0, reverse(//oai:setSpec))", TextTypologies);
$varSuperTypeReverse = Convert(xpath:"normalize-space($varCobjCategoryReverse)", SuperTypes);
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_']/$varCobjCategoryReverse", @type = $varSuperTypeReverse;);
$varCobjCategoryStraight = Convert(xpath:"insert-before(//dc:type , 100, //oai:setSpec)", TextTypologies);
$varSuperTypeStraight = Convert(xpath:"normalize-space($varCobjCategoryStraight)", SuperTypes);
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[not(//dc:type[1]/lower-case(.) = ('text', 'info:eu-repo/semantics/other', 'other') or //oaf:datasourceprefix/lower-case(.) = 'openedition_') and (not(//oaf:datasourceprefix/lower-case(.) = 'od________65'))]/$varCobjCategoryStraight", @type = $varSuperTypeStraight;);
// CERN CDS when dc:type or setSpec explicitly states resource type
// (currently :CONF not covered as not included in vocabulary, and as landing in literature already; other sets might also be addressed, depending on marked resource types)
$varCobjCategoryCernExplicit = Convert(xpath:"normalize-space((//dc:type, //*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')]/tokenize(., ':')[2])[1])", TextTypologies);
$varSuperTypeCernExplicit = Convert(xpath:"normalize-space($varCobjCategoryCernExplicit)", SuperTypes);
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//oaf:datasourceprefix = 'od________65' and (//dc:type or //*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')])]/$varCobjCategoryCernExplicit", @type = $varSuperTypeCernExplicit;);
// CERN CDS when set vaguely hints on literature
$varCobjCategoryCernVague = xpath:"//oaf:datasourceprefix[not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')]) and //*[local-name() = 'setSpec'][ends-with(., ':FULLTEXT')]]/'0038'";
$varSuperTypeCernVague = Convert(xpath:"normalize-space($varCobjCategoryCernVague)", SuperTypes);
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//oaf:datasourceprefix = 'od________65' and not(//dc:type) and not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')])]/$varCobjCategoryCernVague", @type = $varSuperTypeCernVague;);
// CERN CDS when no hint
$varCobjCategoryCernUnknown = xpath:"//oaf:datasourceprefix[not(//dc:type) and not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT') or ends-with(., ':FULLTEXT')])]/'0000'";
$varSuperTypeCernUnknown = Convert(xpath:"normalize-space($varCobjCategoryCernUnknown)", SuperTypes);
dr:CobjCategory = set(xpath:"//oaf:datasourceprefix[//oaf:datasourceprefix = 'od________65' and not(//dc:type) and not(//*[local-name() = 'setSpec'][contains(., ':BOOK') or contains(., ':REPORT')])]/$varCobjCategoryCernUnknown", @type = $varSuperTypeCernUnknown;);
//
// review status
$varRefereedConvt = Convert(xpath:"(//dc:type, //oai:setSpec, //dc:description)", ReviewLevels);
$varRefereedReltn = xpath:"//*[string(node-name(.)) = 'dc:relation' and matches(lower-case(normalize-space(.)), 'doi\s*:?\s*10.24072/.+')]/'0001'";
//$varRefereedProse = xpath:"//*[string(node-name(.)) = 'dc:description' and matches(lower-case(.), '(this|a)\s*(article|preprint)\s*(has\s*been\s*)?(peer[\-\s]*)?reviewed\s*and\s*recommended\s*by\s*peer[\-\s]*c
ommunity') and contains(., '10.24072/')]/'0001'";
$varRefereedDesct = xpath:"(//dc:description[matches(lower-case(.), '.*(this\s*book|this\s*volume|it)\s*constitutes\s*the\s*(thoroughly\s*)?refereed') or matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or matches(lower-case(.), '(this|a)\s*(article|preprint)\s*(has\s*been\s*)?(peer[\-\s]*)?reviewed\s*and\s*recommended\s*by\s*peer[\-\s]*community')]/'0001', //dc:description[matches(., '^version\s*(préliminaire.*|preliminary.*|0$)')]/'0002')";
$varRefereedTitle = xpath:"//dc:title[matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\]\s*$')]/'0001'";
$varRefereedSourc = xpath:"//*[string(node-name(.)) = ('dc:source', 'dc:publisher') and matches(lower-case(.), '^(.*\s)?pre[\s\-_]*prints?([\s\.,].*)?$')]/'0002'";
//$varRefereedIdntf = xpath:"(//*[string(node-name(.)) = 'dc:relation' and matches(lower-case(.), '^info:eu-repo/semantics/altidentifier/doi/10\..*[\.\-_/\s\(\)]pre[\.\-_/\s\(\)]?prints?([\.\-_/\s\(\)].*)?$')], //*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)])pre[\.\-_/\s\(\)]?prints?([\.\-_/\s\(\)].*)?$')])/'0002'";
$varRefereedIdntf = xpath:"(//*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)%\d#])pre[\.\-_/\s\(\)%\d#]?prints?([\.\-_/\s\(\)%\d#].*)?$')][count(//dc:identifier) = 1]/'0002', //*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)%\d#])refereed([\.\-_/\s\(\)\d%\d#].*)?$')]/'0001', //*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '.*-peer-reviewed-(fulltext-)?article-.*')]/'0001', //*[string(node-name(.)) = 'dc:identifier' and matches(., '^(https?://(dx\.)?doi.org/)?10\.12688/(f1000research|wellcomeopenres|hrbopenres|aasopenres|gatesopenres)\.\d*(\.\d*|-\d*\.v\d*)$')]/'0001', //*[string(node-name(.)) = 'dc:relation' and matches(., '^info:eu-repo/semantics/altIdentifier/doi/10\.12688/(f1000research|wellcomeopenres|aasopenres|gatesopenres|hrbopenres)\.\d*(\.\d*|-\d*\.v\d*)$', 'i')]/'0001')";
$varRefereed = xpath:"($varRefereedConvt, $varRefereedReltn, $varRefereedDesct, $varRefereedTitle, $varRefereedSourc, $varRefereedIdntf)";
if xpath:"count(index-of($varRefereed, '0001')) &gt;0" oaf:refereed = xpath:"'0001'"; else $varDummy= "''";
if xpath:"count(index-of($varRefereed, '0002')) &gt;0 and count(index-of($varRefereed, '0001')) = 0" oaf:refereed = xpath:"'0002'"; else $varDummy= "''";
//
if xpath:"(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()) and not(//oaf:datasourceprefix = 'od_______151')" oaf:accessrights = "EMBARGO"; else $var0 = "''";
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo')) and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))] or (//oaf:datasourceprefix = 'od_______151')" oaf:accessrights = Convert(xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]", AccessRights); else $var0 = "''";
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/embargo') and not((xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date()))] and not(//oaf:datasourceprefix = 'od_______151')" oaf:accessrights = "OPEN"; else $var0 = "''";
if xpath:"count(//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/')]) eq 0 and not($varDatasourceid = ('opendoar____::3532', 'opendoar____::109', 'opendoar____::151'))" oaf:accessrights = "OPEN"; else $var0 = "''";
//
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') and (xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) gt current-date())" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics') " oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
// if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics/restrictedAccess') ]" oaf:accessrights = "RESTRICTED"; else $var0 = "''";
// if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics') and not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())]" $var0 = "''"; else oaf:accessrights = "OPEN";
// oaf:accessrights = xpath:"//dc:rights[ not(starts-with(normalize-space(.), 'info:eu-repo/semantics')) and xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date()]/concat('OPEN')";
// oaf:accessrights = xpath:"//dc:rights[not(contains(normalize-space(.), 'info:eu-repo/semantics'))]/normalize-space('OPEN')";
// oaf:accessrights = xpath:"not(xs:date( max( ($varEmbargoEnd, '0001-01-01') ) ) lt current-date())";
//
if xpath:"$varDatasourceid = 'opendoar____::3532' and //dc:format = 'fulltext'" oaf:accessrights = "OPEN"; else $var0 = "''";
if xpath:"$varDatasourceid = 'opendoar____::3532' and //dc:format = 'abstractOnly'" oaf:accessrights = "CLOSED"; else $var0 = "''";
if xpath:"$varDatasourceid = 'opendoar____::3532' and not(//dc:format = ('fulltext', 'abstractOnly'))" oaf:accessrights = "UNKNOWN"; else $var0 = "''";
if xpath:"$varDatasourceid = 'opendoar____::109'" oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights[starts-with(., 'http')][1])", AccessRights); else $var0 = "''";
if xpath:"$varDatasourceid = 'openaire____::paho_covid19' and //dc:identifier[ends-with(., 'pdf')]" oaf:fulltext = xpath:"//dc:identifier[ends-with(., 'pdf')]"; else $var0 = "''";
oaf:license = xpath:"//dc:rights[contains (., 'http://creativecommons.org/licenses/') or contains(., 'http://opensource.org/licenses/')]";
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
//
//$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
//$varId = identifierExtract('["//dc:identifier", "//dc:relation[not(//*[local-name() = \"datasourceprefix\" and .=\"od______1859\"])]"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[
^\s"&lt;&gt;]+)');
$varIdDoi = identifierExtract('["//dc:identifier[starts-with(normalize-space(.), \"info:\") or starts-with(normalize-space(.), \"urn:\") or starts-with(normalize-space(.), \"doi:\") or starts-with(normalize-space(.), \"DOI:\") or starts-with(normalize-space(.), \"10.\") or ((starts-with(normalize-space(.), \"http\") or starts-with(normalize-space(.), \"PURE LINK: http\")) and contains(., \"doi.org/10.\"))]", "//dc:relation[contains(., \"info:eu-repo/semantics/altIdentifier/doi/\") or ((contains(., \"info:eu-repo/semantics/altIdentifier/url/http\") or contains(., \"info:eu-repo/semantics/altIdentifier/urn/http\")) and contains(., \"doi.org/\"))]"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
//$varIdDoiNonStd = identifierExtract('["//dc:relation[not(contains(., \"info:eu-repo/semantics/altIdentifier/doi/\"))]"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
$varIdDoiNonStd = identifierExtract('["//dc:relation[not(contains(., \"info:eu-repo/semantics/altIdentifier/\"))][starts-with(normalize-space(.), \"info:\") or starts-with(normalize-space(.), \"urn:\") or starts-with(normalize-space(.), \"doi:\") or starts-with(normalize-space(.), \"DOI:\") or starts-with(normalize-space(.), \"10.\") or ((starts-with(normalize-space(.), \"http\") or starts-with(normalize-space(.), \"PURE LINK: http\")) and contains(., \"doi.org/10.\"))]"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/&lt;&gt;]*/[^\s"&lt;&gt;]+)');
// 1st param: list of xpath expresssions to be applied on the metadata in json syntax; 2nd param: xpath expression for the metadata record; 3rd param reg expr that matches with a negative lookahead for the first group and extracts digits of the second group
//$varHandle = xpath:"//dc:identifier[//oaf:datasourceprefix[.='od______2097'] and starts-with(., 'http://hdl.handle.net/')]/substring-after(., 'http://hdl.handle.net/')";
$varIdHdl = identifierExtract('["//dc:identifier[starts-with(., \"info:hdl:\") or ((starts-with(., \"http\") or starts-with(., \"PURE LINK: http\") or starts-with(., \"URI:http\")) and contains(., \"://hdl.handle.net/\"))][not(contains(., \"123456789\"))]", "//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/hdl/\") or (starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/url/\") and contains(., \"://hdl.handle.net/\"))]"]' , xpath:"./*[local-name()='record']" , '(?!(info:hdl:|://hdl.handle.net/|info:eu-repo/semantics/altIdentifier/hdl/))(\d.*)');
// $varUrn = xpath:"substring-after(//dc:relation[starts-with(normalize-space(.), 'info:eu-repo/semantics/altIdentifier/urn/')], 'info:eu-repo/semantics/altIdentifier/urn/')";
//$varUrn = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/urn/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/urn/)(urn:nbn:.*)');
$varIdUrn = identifierExtract('["//dc:identifier[starts-with(., \"urn:nbn:\") or starts-with(., \"URN:NBN:\") or ((starts-with(., \"http\") or starts-with(., \"PURE LINK: http\")) and (contains(., \"://nbn-resolving.org/urn:nbn:\") or contains(., \"://nbn-resolving.de/urn/resolver.pl?urn:nbn:\") or contains(., \"://resolver.obvsg.at/urn:nbn:\") or contains(., \"://urn.fi/URN:NBN:\")))]", "//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/urn/\")]"]' , xpath:"./*[local-name()='record']" , '(?!(://nbn-resolving.org/|info:eu-repo/semantics/altIdentifier/urn/))((urn:nbn:|URN:NBN:).*)');
//$varIsbn = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2097'] and starts-with(., '978') or starts-with(., '979')]";
$varIdIsbn = identifierExtract('["//dc:identifier[starts-with(normalize-space(.), \"urn:isbn:\") or starts-with(normalize-space(.), \"urn:ISBN:\") or starts-with(normalize-space(.), \"isbn:\") or starts-with(normalize-space(.), \"ISBN:\") or (starts-with(., \"978-\") and (string-length(.) = 17 or string-length(normalize-space(substring-before(., \"(\"))) = 17))]", "//*[name() = \"dc:relation\" or name() = \"dc:identifier\"][starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/isbn/\")]","//dc:source[//*[local-name() = \"datasourceprefix\" and .=\"od______2097\"] and starts-with(., \"978\") or starts-with(., \"979\")]"]' , xpath:"./*[local-name()='record']" , '(?!(urn:isbn:|info:eu-repo/semantics/altIdentifier/isbn/))(97.*)');
$varIdIsrn = identifierExtract('["//dc:identifier[starts-with(., \"ISRN:\")]"]' , xpath:"./*[local-name()='record']" , '(ISRN:.+)');
$varIdEan = identifierExtract('["//dc:identifier[starts-with(., \"EAN13:\")]"]' , xpath:"./*[local-name()='record']" , '(EAN13:.+)');
$varIdArk = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/ark/\")]"]' , xpath:"./*[local-name()='record']" , '(info.*)');
//$varPmId = identifierExtract('["//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/pmid/\")]"]' , xpath:"./*[local-name()='record']" , '(?!info:eu-repo/semantics/altIdentifier/pmid/)(\d+)');
$varIdPmId = identifierExtract('["//dc:identifier[((starts-with(., \"http\") or starts-with(., \"PURE LINK: http\")) and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/\") and not(contains(., \"/pdf/\"))) or starts-with(., \"info:pmid/\") or starts-with(., \"PMID:\") or starts-with(., \"PubMed:\")]", "//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/pmid/\") or ((contains(., \"info:eu-repo/semantics/altIdentifier/url/http\") or contains(., \"info:eu-repo/semantics/altIdentifier/urn/http\")) and contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/\") and not(contains(., \"/pdf/\")))]"]' , xpath:"./*[local-name()='record']" , '(?!(://www.ncbi.nlm.nih.gov/pmc/articles/|info:eu-repo/semantics/altIdentifier/pmid/))(\d+)');
$varIdPmc = identifierExtract('["//dc:identifier[starts-with(., \"PMCID:\") or starts-with(., \"pmc:\")]"]' , xpath:"./*[local-name()='record']" , '(PMC\d+)');
$varIdHal = identifierExtract('["//dc:identifier[starts-with(., \"hal-\") or starts-with(., \"halshs-\") or starts-with(., \"halsde-\")]"]' , xpath:"./*[local-name()='record']" , '(hal.*)');
$varIdBibc = identifierExtract('["//dc:identifier[starts-with(., \"BibCode:\")]"]' , xpath:"./*[local-name()='record']" , '(([\d\.]).*)');
$varIdArxv = identifierExtract('["//dc:identifier[((starts-with(., \"http\") or starts-with(., \"PURE LINK: http\") or starts-with(., \"ArXiv: http\")) and contains(., \"://arxiv.org/abs/\")) or starts-with(., \
"arXiv:\")]", "//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/arxiv/\") or ((contains(., \"info:eu-repo/semantics/altIdentifier/url/http\") or contains(., \"info:eu-repo/sem
antics/altIdentifier/urn/http\")) and contains(., \"://arxiv.org/abs/\"))]"]' , xpath:"./*[local-name()='record']" , '(?!(://arxiv.org/abs/|:eu-repo/semantics/altIdentifier/arxiv/))([a-zA-Z].*)');
$varIdWos = identifierExtract('["//dc:identifier[starts-with(., \"WOS:\")]", "//dc:relation[starts-with(normalize-space(.), \"info:eu-repo/semantics/altIdentifier/wos/\")]"]' , xpath:"./*[local-name()='record']" , '(info.*|WOS:.+)');
$varIdScp = identifierExtract('["//dc:identifier[starts-with(normalize-space(.), \"SCOPUS_ID:\") or starts-with(normalize-space(.), \"Scopus:\") or ((starts-with(normalize-space(.), \"http\") or starts-with(normalize-space(.), \"PURE LINK: http\")) and contains(., \"://www.scopus.com/inward/record.ur\"))]"]' , xpath:"./*[local-name()='record']" , '(.+)');
$varIdLdpg = identifierExtract('["//dc:identifier[(contains(substring-after(., \"://\"), \"/\") and contains(//*[local-name() = \"baseURL\"], substring-before(substring-after(., \"://\"), \"/\"))) or (contains(substring-after(., \"://\"), \":\") and contains(//*[local-name() = \"baseURL\"], substring-before(substring-after(., \"://\"), \":\")))][not(starts-with(., \"https://\") and contains(., \".qucosa.de/api/qucosa\"))]"]', xpath:"./*[local-name()='record']" , '(http.*)');
//$varUrl = xpath:"//dc:identifier[//oaf:datasourceprefix[.='od______1859'] and starts-with(., 'http') and contains(., '/handle/')]";
//$varIdUrl = identifierExtract('["//dc:identifier[starts-with(normalize-space(.), \"http\")][not(contains(., \"doi.org/\")) and not(contains(., \"hdl.handle.net/\")) and not(contains(., \"://nbn-resolving.org/\")) and not(contains(., \".fr/hal-\")) and not(contains(., \".fr/halsde-\")) and not(contains(., \".fr/halshs-\")) and not(contains(., \"://arxiv.org/abs/\")) and not(contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/\"))]"]' , xpath:"./*[local-name()='record']" , '(http.*)');
$varIdUrl = identifierExtract('["//dc:identifier[starts-with(normalize-space(.), \"http\") or starts-with(normalize-space(.), \"url:http\") or starts-with(normalize-space(.), \"PURE ITEMURL: http\") or starts-with(normalize-space(.), \"PURE FILEURL: http\")][not(contains(., \"doi.org/\")) and not(contains(., \"hdl.handle.net/\")) and not(contains(., \"://nbn-resolving.org/\")) and not(contains(., \"://nbn-resolving.de/\")) and not(contains(., \"://resolver.obvsg.at/\")) and not(contains(., \".fr/hal-\")) and not(contains(., \".fr/halsde-\")) and not(contains(., \".fr/halshs-\")) and not(contains(., \"://arxiv.org/abs/\")) and not(contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/\"))][not(contains(., \"://localhost/\") or contains(., \"://localhost:\"))]", "//dc:relation[contains(., \"info:eu-repo/semantics/altIdentifier/url/http\") or contains(., \"info:eu-repo/semantics/altIdentifier/urn/http\")][not(contains(., \"doi.org/\")) and not(contains(., \"hdl.handle.net/\")) and not(contains(., \"://nbn-resolving.org/\")) and not(contains(., \"://nbn-resolving.de/\")) and not(contains(., \".fr/hal-\")) and not(contains(., \".fr/halsde-\")) and not(contains(., \".fr/halshs-\")) and not(contains(., \"://arxiv.org/abs/\")) and not(contains(., \"://www.ncbi.nlm.nih.gov/pmc/articles/\"))][not(contains(., \"://localhost/\") or contains(., \"://localhost:\"))]"]' , xpath:"./*[local-name()='record']" , '(http.*)');
$varIdList = xpath:"(($varIdDoi//value, varIdDoiNonStd//value, $varIdHdl//value, $varIdUrn//value, $varIdIsbn//value, $varIdIsrn//value, $varIdEan//value, $varIdArk//value, $varIdPmId//value, $varIdPmc//value, $varIdHal//value, $varIdBibc//value, $varIdArxv//value, $varIdWos//value, $varIdScp//value, $varIdLdpg//value, $varIdUrl//value))";
$varIdUrlOrResolvableList = xpath:"(($varIdLdpg//value, $varIdUrl//value, $varIdDoi//value, varIdDoiNonStd//value, $varIdHdl//value, $varIdUrn//value, $varIdPmId//value, $varIdPmc//value, $varIdHal//value, $varIdArxv//value))";
if xpath:"count($varIdUrlOrResolvableList) &gt; 0" $varHttpTest = "true"; else dc:identifier = skipRecord();
$varKnownFileEndings = xpath:"('.bmp', '.doc', '.docx', '.epub', '.flv', '.htm', '.html', '.jpeg', '.jpg', '.m4v', '.mp4', '.mpg', '.odp', '.pdf', '.png', '.ppt', '.tiv', '.txt', '.xls', '.xlsx', '.zip')";
dc:identifier = xpath:"$varIdUrlOrResolvableList[1]";
dr:CobjIdentifier = xpath:"distinct-values(//dc:identifier[not(starts-with(normalize-space(.), 'http') or starts-with(normalize-space(.), 'url:http') or starts-with(normalize-space(lower-case(.)), 'uri:http') or starts-with(normalize-space(.), 'PURE ITEMURL: http') or starts-with(normalize-space(.), 'PURE FILEURL: http') or starts-with(normalize-space(.), 'PURE LINK: http'))][not(normalize-space(.) = ($varIdList))][not(starts-with(normalize-space(.), 'info:doi:') or starts-with(normalize-space(lower-case(.)), 'doi:') or starts-with(normalize-space(.), 'info:doi/') or starts-with(normalize-space(.), 'info:eu-repo/semantics/altIdentifier/doi/'))][not(starts-with(normalize-space(.), 'info:hdl:'))][not(starts-with(normalize-space(lower-case(.)), 'urn:isbn:') or starts-with(normalize-space(lower-case(.)), 'isbn:') or starts-with(normalize-space(.), 'info:eu-repo/semantics/altIdentifier/isbn/'))][not(starts-with(normalize-space(.), 'urn:nbn:'))][not(starts-with(., 'info:pmid/') or starts-with(lower-case(.), 'pmid:') or starts-with(lower-case(.), 'pubmed:') or starts-with(lower-case(.), 'pmcid:') or starts-with(lower-case(.), 'pmc:'))][not(starts-with(lower-case(.), 'bibcode:'))][not(. = $varISSN[1]) and not(starts-with(lower-case(.), 'issn:') or starts-with(lower-case(.), 'urn:issn:'))][not(starts-with(., 'SCOPUS_ID:'))][not(starts-with(., 'oai:'))][normalize-space(.) != ''])";
//oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
oaf:identifier = set(xpath:"($varIdDoi//value[1], $varIdDoiNonStd[1]//value)[1]", @identifierType = "doi";);
//oaf:identifier = set(xpath:"$varHandle", @identifierType = "handle";);
oaf:identifier = set(xpath:"$varIdHdl//value", @identifierType = "handle";);
//oaf:identifier = set(xpath:"$varUrn//value", @identifierType = "urn";);
oaf:identifier = set(xpath:"$varIdUrn//value", @identifierType = "urn";);
//oaf:identifier = set(xpath:"$varIsbn", @identifierType = "isbn";);
oaf:identifier = set(xpath:"$varIdIsbn//value", @identifierType = "isbn";);
oaf:identifier = set(xpath:"$varIdIsrn//value/normalize-space(substring-after(., 'ISRN:'))", @identifierType = "isrn";);
oaf:identifier = set(xpath:"$varIdEan//value/normalize-space(substring-after(., 'EAN13:'))", @identifierType = "ean";);
oaf:identifier = set(xpath:"$varIdArk//value/substring-after(., 'info:eu-repo/semantics/altIdentifier/ark/')", @identifierType = "ark";);
//oaf:identifier = set(xpath:"$varPmId//value", @identifierType = "pmid";);
oaf:identifier = set(xpath:"$varIdPmId//value", @identifierType = "pmid";);
oaf:identifier = set(xpath:"$varIdPmc//value", @identifierType = "pmcid";);
oaf:identifier = set(xpath:"$varIdHal//value", @identifierType = "hal";);
oaf:identifier = set(xpath:"$varIdBibc//value", @identifierType = "bibcode";);
//oaf:identifier = set(xpath:"distinct-values(($varIdArxv//value[contains(., '://arxiv.org/abs/')]/substring-after(., '://arxiv.org/abs/'), $varIdArxv//value[contains(., 'info:eu-repo/semantics/altIdentifier/arxiv/')]/substring-after(., 'info:eu-repo/semantics/altIdentifier/arxiv/')))", @identifierType = "arxiv";);
oaf:identifier = set(xpath:"distinct-values(($varIdArxv//value/normalize-space(replace(., '(https?://arxiv.org/abs/|info:eu-repo/semantics/altIdentifier/arxiv/|info:eu-repo/semantics/altIdentifier/url/|info:eu-repo/semantics/altIdentifier/urn/|arXiv:)', '', 'i'))))", @identifierType = "arxiv";);
oaf:identifier = set(xpath:"$varIdWos//value/normalize-space(replace(., '(info:eu-repo/semantics/altIdentifier/wos/|WOS:)', ''))", @identifierType = "wos";);
oaf:identifier = set(xpath:"distinct-values(($varIdScp//value/replace(normalize-space(.), '^((PURE LINK: )?https?://www.scopus.com/inward/record.ur.*(scp=|eid=2-s2.0-)|SCOPUS_ID:\s*|Scopus:\s*)(\d+).*$', '$4')))", @identifierType = "scp";);
oaf:identifier = set(xpath:"$varIdLdpg//value[not(replace(lower-case(.), '.*(\.[a-z]*)$', '$1') = $varKnownFileEndings)]", @identifierType = "landingPage";);
//oaf:identifier = set(xpath:"$varUrl", @identifierType = "url";);
oaf:identifier = set(xpath:"$varIdUrl//value[count(index-of($varIdLdpg//value, .)) = 0 or replace(lower-case(.), '.*(\.[a-z]*)$', '$1') = $varKnownFileEndings]", @identifierType = "url";);
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
// journal data;
// PURE: exposes ISSN in field ns2:isPartOf, journal title not extractable due to ' usage in source field
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '('))";
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1'))";
//$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1')), //dc:source[//oaf:datasourceprefix[.='od______2097'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1]";
$varJournalTitle = xpath:"//dc:subject[1][//oaf:datasourceprefix[.='dovemedicalp']]/normalize-space(.), //dc:source[1][//oaf:datasourceprefix[.='scindeksserb']]/normalize-space(substring-before(., '(')), //dc:source[//oaf:datasourceprefix[.='od______2659']][//dc:relation[matches(., '(issn:|info:eu-repo/semantics/altIdentifier/issn/)','i')]]/normalize-space(replace(., '^(.*?)\s(\d|v\.\s\d).*$', '$1')), //dc:source[//oaf:datasourceprefix[.='od______2097'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1], //dc:source[1][//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and //dc:source[not(starts-with(., 'ISSN '))]]/replace(., '^(.*?)\.\s*\d{4}.*$', '$1'), //dc:source[//oaf:datasourceprefix[.='issn22953671'] and //dc:source[matches(., '\d{4}-\d{3}[\dX]')] and not(matches(., '\d{4}-\d{3}[\dX]'))][1]/substring-before(., ';')";
//$varISSN = xpath:"//oai:setSpec[starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:'))";
//$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2')";
//$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:')]/normalize-space(substring-after(., 'ISSN:')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:source[//oaf:datasourceprefix='issn20381026'][matches(.,'\d\d\d\d-\d\d\d\d')][1], //dc:identifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.='od______2097'] and matches(., '\d{4}-\d{3}[\dX]')]";
//$varISSN = xpath:"//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:') or starts-with(., 'ISSN ')]/normalize-space(replace(., 'ISSN[:\s](.*)$', '$
1')), //dc:relation[starts-with(., 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(., 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:ident
ifier[//oaf:datasourceprefix[.='od______3636'] and matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.=('od______2097', 'issn22953671')] and matches(., '^\d{4}-\d{3}[\dX]$')][1]";
//$varISSN = xpath:"(//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:') or starts-with(., 'ISSN ')]/normalize-space(replace(., 'ISSN[:\s](.*)$', '
$1')), //dc:relation[starts-with(lower-case(.), 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')]/replace(normalize-space(substring-after(lower-case(.), 'issn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:identifier[matches(., '[0-9]{4}-[0-9]{3}[0-9X]')], //dc:source[//oaf:datasourceprefix[.=('od______2097', 'issn22953671')] and matches(., '^\d{4}-\d{3}[\dX]$')][1], //*[local-name()='isPartOf'][starts-with(., 'urn:ISSN:')]/substring-after(., 'urn:ISSN:'), //dc:identifier[starts-with(., 'urn:issn:')]/substring-after(. ,'urn:issn:'))[1]";
$varISSN = xpath:"(//*[local-name()='setSpec'][starts-with(., 'ISSN')]/substring-after(., 'ISSN'), //dc:source[starts-with(., 'ISSN:') or starts-with(., 'ISSN ')]/normalize-space(replace(., 'ISSN[:\s](.*)$', '$1')), //dc:relation[(starts-with(lower-case(.), 'issn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/issn/')) and matches(., '\d{4}[-\s]?\d{3}[\dX]')]/replace(.,'(issn[/:]|info:eu-repo/semantics/altIdentifier/issn/)([0-9]{4})-?([0-9X]{4})','$2-$3','i'), //dc:identifier[matches(normalize-space(.), '^[0-9]{4}-[0-9]{3}[0-9X](\s*\(print\))?$', 'i')]/replace(., '.*([0-9]{4}-[0-9]{3}[0-9X]).*', '$1'), //dc:source[//oaf:datasourceprefix[.=('od______2097', 'issn22953671')] and matches(., '^\d{4}-\d{3}[\dX]$')][1], //*[local-name()='isPartOf'][starts-with(., 'urn:ISSN:')]/substring-after(., 'urn:ISSN:'), //dc:identifier[starts-with(lower-case(.), 'urn:issn:') or starts-with(lower-case(.), 'issn:')]/substring-after(lower-case(.) ,'issn:'))[1]";
$varEISSN = xpath:"//dc:relation[starts-with(., 'eissn:') or starts-with(., 'info:eu-repo/semantics/altIdentifier/eissn/')]/replace(normalize-space(substring-after(., 'eissn')),'[/:]([0-9]{4})-?([0-9X]{4})','$1-$2'), //dc:identifier[matches(normalize-space(.), '[0-9]{4}-[0-9]{3}[0-9X]\s*\(online\)', 'i')]/replace(., '.*([0-9]{4}-[0-9]{3}[0-9X]).*', '$1')";
//oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";);
//to be improved: many identical checks
//$varVol = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/replace(., '^.*?\.\s*(\d{4})($|,.*$)', '$1')";
$varVol = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/replace(., '^.*?\.\s*(\d{4})($|,.*$)', '$1'), //dc:source[starts-with(//*[local-name()='isPartOf'], 'urn:ISSN:')][contains(., ', vol. ')]/normalize-space(substring-before(substring-after(., ', vol. '), ','))";
$varIss = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(substring-before(substring-after(., 'Nr.'), ','))";
//$varSp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(tokenize(substring-after(., ', p.'), '-')[1])";
$varSp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(tokenize(substring-after(., ', p.'), '-')[1]), //dc:source[starts-with(//*[local-name()='isPartOf'], 'urn:ISSN:')][contains(., ', pp. ')]/replace(., '^.*, pp. (\d*)-\d*[\s,\.;].*$', '$1')";
//$varEp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(reverse(tokenize(substring-after(., ', p.'), '-'))[1])";
$varEp = xpath:"//dc:source[//oaf:datasourceprefix[.='od______2712'] and //dc:source[starts-with(., 'ISSN ')] and not(starts-with(., 'ISSN '))]/normalize-space(reverse(tokenize(substring-after(., ', p.'), '-'))[1]), //dc:source[starts-with(//*[local-name()='isPartOf'], 'urn:ISSN:')][contains(., ', pp. ')]/replace(., '^.*, pp. \d*-(\d*)[\s,\.;].*$', '$1')";
//to be improved: many empty attributes
oaf:journal = set($varJournalTitle, @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";, @vol = xpath:"$varVol";, @iss = xpath:"$varIss";, @sp = xpath:"$varSp";, @ep = xpath:"$varEp";);
if xpath:"//oaf:datasourceprefix[.='dovemedicalp']" oaf:fulltext = xpath:"concat('file:///mnt/downloaded_dumps/dovepress/', substring-after(//*[local-name()='header']/*[local-name()='identifier'], 'oai:dovepress.com/'), '.pdf')"; else $varDummy= "''";
if xpath:"//oaf:datasourceprefix[.='od______3848'] and //dc:format[.='application/pdf']" oaf:fulltext = xpath:"//dc:identifier[ends-with(lower-case(normalize-space(.)), '.pdf')][starts-with(lower-case(normalize-space(.)), 'https://cris.cumulus.vub.ac.be/')]"; else $varDummy= "''";
if xpath:"//oaf:datasourceprefix[.='doaj21976775' or .='issn21976775'] and //dc:identifier[starts-with(normalize-space(.), 'https://policyreview.info/node/')]" oaf:fulltext = xpath:"concat(//dc:identifier[starts-with(normalize-space(.), 'https://policyreview.info/node/')]/normalize-space(.), '/pdf')"; else $varDummy= "''";
apply xpath:"//dc:relation[starts-with(., 'https://etalpykla.lituanistikadb.lt/fedora/get/')][//oaf:datasourceprefix[.='od______2712']]" if xpath:"true()" oaf:fulltext = xpath:"normalize-space(.)"; else $varDummy = "''";
if xpath:"//oaf:datasourceprefix[.='od______4149'] and //dc:format[.='application/pdf']" oaf:fulltext = xpath:"//dc:identifier[contains(lower-case(normalize-space(.)), '/datastream/')]"; else $varDummy= "''";
oaf:fulltext = xpath:"//dc:identifier[//oaf:datasourceprefix = 'od______2584' and starts-with(., 'http://fulir.irb.hr/') and ends-with(., '.pdf')]";
// community
// concept should not appear with empty attribute id, i.e when there is no community - ugly, but seems to work (oaf:datasourceprefix = just any field available in all records)
//$varCommunity = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/')]/substring-after(., 'url:https://openaire.eu/communities/')";
//oaf:concept = set(xpath:"//oaf:datasourceprefix[string-length($varCommunity) gt 0]/''", @id = $varCommunity;);
$varCommunityAtt = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/') or starts-with(., 'url:https://zenodo.org/communities/')]/substring-after(., 'url:')";
$varCommunityVal = xpath:"//*[local-name()='relation'][starts-with(., 'url:https://openaire.eu/communities/') or starts-with(., 'url:https://zenodo.org/communities/')]/substring-before(., 'url:')";
oaf:concept = set(xpath:"$varCommunityVal", @id = xpath:"subsequence($varCommunityAtt,position(),1)";);
end

View File

@ -0,0 +1,288 @@
<!-- adapted from PROD at 2021-05-26 -->
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
exclude-result-prefixes="xsl vocabulary dateCleaner"
version="2.0">
<xsl:param name="varOfficialName" />
<xsl:param name="varDsType" />
<xsl:param name="varDataSourceId" />
<xsl:param name="index" select="0" />
<xsl:param name="transDate" select="current-dateTime()" />
<xsl:template match="/">
<xsl:variable name="datasourcePrefix" select="normalize-space(//oaf:datasourceprefix)" />
<xsl:call-template name="validRecord" />
</xsl:template>
<!-- skip/terminate record transformation -->
<xsl:template name="terminate">
<xsl:message terminate="yes">
record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<!-- validate record -->
<xsl:template name="validRecord">
<record>
<xsl:apply-templates select="//*[local-name() = 'header']" />
<metadata>
<xsl:apply-templates select="//*[local-name() = 'metadata']//*[local-name() = 'resource']" /> <!-- for CoCoON many deleted records appeared among the transformed records -->
<xsl:if test="//oai:header/@status='deleted'">
<xsl:call-template name="terminate" />
</xsl:if>
<xsl:for-each select="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='Handle'][not(. = '123456789')]">
<oaf:identifier>
<xsl:attribute name="identifierType">
<xsl:value-of select="'handle'" />
</xsl:attribute>
<xsl:if test="contains(., '://hdl.handle.net/')">
<xsl:value-of select="substring-after(., '://hdl.handle.net/')" />
</xsl:if>
<xsl:if test="not(contains(., '://hdl.handle.net/'))">
<xsl:value-of select="." />
</xsl:if>
</oaf:identifier>
</xsl:for-each>
<xsl:for-each select="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='DOI']">
<oaf:identifier>
<xsl:attribute name="identifierType">
<xsl:value-of select="'doi'" />
</xsl:attribute>
<xsl:if test="contains(., '://dx.doi.org/')">
<xsl:value-of select="substring-after(., '://dx.doi.org/')" />
</xsl:if>
<xsl:if test="not(contains(., '://dx.doi.org/'))">
<xsl:value-of select="." />
</xsl:if>
</oaf:identifier>
</xsl:for-each> <!-- This is the only difference with the generic datacite repo rule: since all datasets from Nakala are Images -->
<xsl:if test="lower-case(//*[local-name()='resourceType']/@*[local-name()='resourceTypeGeneral']) = 'image'">
<oaf:concept>
<xsl:attribute name="id">
<xsl:value-of select="'dariah'" />
</xsl:attribute>
</oaf:concept>
</xsl:if>
<xsl:if test="//*[local-name()='date']/@dateType='Available' and //*[local-name()='datasourceprefix']!='r33ffb097cef'">
<xsl:variable name="varEmbargoEndDate" select="dateCleaner:dateISO( normalize-space(//*[local-name()='date'][@dateType='Available']))" />
<xsl:choose>
<xsl:when test="string-length($varEmbargoEndDate) &gt; 0">
<oaf:embargoenddate>
<xsl:value-of select="$varEmbargoEndDate" />
</oaf:embargoenddate>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="terminate" />
</xsl:otherwise>
</xsl:choose>
</xsl:if>
<xsl:variable name="varTypLst" select="distinct-values((//*[local-name()='resourceType']/(., @resourceTypeGeneral)))" />
<xsl:variable name="varCobjCatLst" select="distinct-values((for $i in $varTypLst return vocabulary:clean( normalize-space($i), 'dnet:publication_resource')))" />
<xsl:variable name="varCobjSupLst" select="for $i in $varCobjCatLst return concat($i, '###', vocabulary:clean( normalize-space($i), 'dnet:result_typologies'))" />
<dr:CobjCategory>
<xsl:choose>
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))]) &gt; 0">
<xsl:variable name="varCobjSup" select="$varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')" />
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other')]) &gt; 0">
<xsl:variable name="varCobjSup" select="$varCobjSupLst[not(substring-after(., '###') = 'other')][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')" />
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))]) &gt; 0">
<xsl:variable name="varCobjSup" select="$varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')" />
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0000'))]) &gt; 0">
<xsl:variable name="varCobjSup" select="$varCobjSupLst[not(substring-before(., '###') = ('0000'))][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')" />
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:otherwise>
<xsl:attribute name="type" select="'other'" />
<xsl:value-of select="'0000'" />
</xsl:otherwise>
</xsl:choose>
</dr:CobjCategory> <!-- review status --> <!-- no review hints found in resource type declarations, no version declarations found -->
<xsl:variable name="varRefereedConvt" select="for $i in ( //*[local-name()='resourceType']/(., @resourceTypeGeneral), //oai:setSpec, //*[local-name()='description']) return vocabulary:clean( normalize-space($i), 'dnet:review_levels')" />
<xsl:variable name="varRefereedIdntf" select="( //*[local-name()=('identifier', 'alternateIdentifier')][count(//*[local-name()=('metadata', 'resource')]//*[local-name()=('identifier', 'alternateIdentifier')]) = 1][matches(lower-case(.), '(^|.*[\.\-_\\/\s\(\)%\d#:])pre[\.\-_\\/\s\(\)%\d#:]?prints?([\.\-_\\/\s\(\)%\d#:].*)?$')]/'0002', //*[local-name()=('identifier', 'alternateIdentifier')][count(//*[local-name()=('metadata', 'resource')]//*[local-name()=('identifier', 'alternateIdentifier')]) = 1][matches(lower-case(.), '(^|.*[\.\-_\\/\s\(\)%\d#:])refereed([\.\-_\\/\s\(\)%\d#:].*)?$')]/'0001', //*[local-name()=('identifier', 'alternateIdentifier')][count(//*[local-name()=('metadata', 'resource')]//*[local-name()=('identifier', 'alternateIdentifier')]) = 1][matches(lower-case(.), '.*-peer-reviewed-(fulltext-)?article-.*')]/'0001')" />
<xsl:variable name="varRefereedVersn" select="(//*[local-name()='version'][matches(lower-case(.), '.*peer[\s\-\.\\_/:%]?reviewed.*')]/'0001', //*[local-name()='version'][matches(normalize-space(lower-case(.)), '^(v|vs|version|rel|release)?[\s\.\-_]*0$')]/'0002', //*[local-name()='version'][matches(lower-case(.), '(^|[\s\-\.\\_/:%].*)(beta|draft|trial|test)([\s\-\.\\_/:%].*|$)')]/'0002', //*[local-name()='version'][matches(lower-case(.), '.*submi(tted|ssion|ttal).*')]/'0002') " />
<xsl:variable name="varRefereedOther" select="(//*[local-name()='publisher'][matches(lower-case(.), '.*[\s\-\.\\_/:%]pre[\s\-\.\\_/:%]?prints?([\s\-\.\\_/:%].*|$)')]/'0002', //*[local-name()='description'][matches(lower-case(.), '^peer[\s\-\.\\_/:%]?reviewed$')]/'0001', //*[local-name()='description'][matches(lower-case(.), '^pre[\s\-\.\\_/:%]?prints?$')]/'0002') " />
<xsl:variable name="varRefereedReltn" select="//*[local-name() = 'relatedIdentifier'][./@relationType/lower-case(.)='isreviewedby']/'0001'" />
<xsl:variable name="varRefereedDesct" select="(//*[local-name() = 'description'] [matches(lower-case(.), '.*(this\s*book|this\s*volume|it)\s*constitutes\s*the\s*(thoroughly\s*)?refereed') or matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or matches(lower-case(.), '(this|a)\s*(article|preprint)\s*(has\s*been\s*)?(peer[\-\s]*)?reviewed\s*and\s*recommended\s*by\s*peer[\-\s]*community')]/'0001')" />
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedIdntf, $varRefereedReltn, $varRefereedVersn, $varRefereedOther, $varRefereedReltn, $varRefereedDesct)" />
<!--
<xsl:variable name="varRefereedConvt" select="for $i in ( //*[local-name()='resourceType']/(., @resourceTypeGeneral), //oai:setSpec, //*[local-name()='description']) return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')" />
<xsl:variable name="varRefereedIdntf" select="( //*[local-name()=('identifier', 'alternateIdentifier')][count(//*[local-name()=('metadata', 'resource')]//*[local-name()=('identifier', 'alternateIdentifier')]) = 1][matches(lower-case(.), '(^|.*[\.\-_\\/\s\(\)%\d#:])pre[\.\-_\\/\s\(\)%\d#:]?prints?([\.\-_\\/\s\(\)%\d#:].*)?$')]/'0002', //*[local-name()=('identifier', 'alternateIdentifier')][count(//*[local-name()=('metadata', 'resource')]//*[local-name()=('identifier', 'alternateIdentifier')]) = 1][matches(lower-case(.), '(^|.*[\.\-_\\/\s\(\)%\d#:])refereed([\.\-_\\/\s\(\)%\d#:].*)?$')]/'0001', //*[local-name()=('identifier', 'alternateIdentifier')][count(//*[local-name()=('metadata', 'resource')]//*[local-name()=('identifier', 'alternateIdentifier')]) = 1][matches(lower-case(.), '.*-peer-reviewed-(fulltext-)?article-.*')]/'0001')" />
<xsl:variable name="varRefereedVersn" select="(//*[local-name()='version'][matches(lower-case(.), '.*peer[\s\-\.\\_/:%]?reviewed.*')]/'0001', //*[local-name()='version'][matches(normalize-space(lower-case(.)), '^(v|vs|version|rel|release)?[\s\.\-_]*0$')]/'0002', //*[local-name()='version'][matches(lower-case(.), '(^|[\s\-\.\\_/:%].*)(beta|draft|trial|test)([\s\-\.\\_/:%].*|$)')]/'0002', //*[local-name()='version'][matches(lower-case(.), '.*submi(tted|ssion|ttal).*')]/'0002') " />
<xsl:variable name="varRefereedOther" select="(//*[local-name()='publisher'][matches(lower-case(.), '.*[\s\-\.\\_/:%]pre[\s\-\.\\_/:%]?prints?([\s\-\.\\_/:%].*|$)')]/'0002', //*[local-name()='description'][matches(lower-case(.), '^peer[\s\-\.\\_/:%]?reviewed$')]/'0001', //*[local-name()='description'][matches(lower-case(.), '^pre[\s\-\.\\_/:%]?prints?$')]/'0002') " />
<xsl:variable name="varRefereedReltn" select="//*[local-name() = 'relatedIdentifier'][./@relationType/lower-case(.)='isreviewedby']/'0001'" />
<xsl:variable name="varRefereedDesct" select="(//*[local-name() = 'description'] [matches(lower-case(.), '.*(this\s*book|this\s*volume|it)\s*constitutes\s*the\s*(thoroughly\s*)?refereed') or matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or matches(lower-case(.), '(this|a)\s*(article|preprint)\s*(has\s*been\s*)?(peer[\-\s]*)?reviewed\s*and\s*recommended\s*by\s*peer[\-\s]*community')]/'0001')" />
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedIdntf, $varRefereedReltn, $varRefereedVersn, $varRefereedOther, $varRefereedReltn, $varRefereedDesct)" />
-->
<xsl:choose>
<xsl:when test="count($varRefereed[. = '0001']) &gt; 0">
<oaf:refereed>
<xsl:value-of select="'0001'" />
</oaf:refereed>
</xsl:when>
<xsl:when test="count($varRefereed[. = '0002']) &gt; 0">
<oaf:refereed>
<xsl:value-of select="'0002'" />
</oaf:refereed>
</xsl:when>
</xsl:choose>
<oaf:dateAccepted>
<xsl:variable name="theDate">
<xsl:choose>
<xsl:when test="string-length(normalize-space(//*[local-name()='date'][@dateType='Issued'])) &gt; 3">
<xsl:value-of select="//*[local-name()='date'][@dateType='Issued']" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="//*[local-name()='publicationYear']" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:value-of select="dateCleaner:dateISO( normalize-space($theDate) )" />
</oaf:dateAccepted>
<oaf:dateAccepted>
<xsl:variable name="theDate">
<xsl:choose>
<xsl:when test="string-length(normalize-space(//*[local-name()='date'][@dateType='Issued'])) &gt; 3">
<xsl:value-of select="//*[local-name()='date'][@dateType='Issued']" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="//*[local-name()='publicationYear']" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:value-of select="dateCleaner:dateISO( normalize-space($theDate) )" />
</oaf:dateAccepted>
<oaf:dateAccepted>
<xsl:variable name="theDate">
<xsl:choose>
<xsl:when test="string-length(normalize-space(//*[local-name()='date'][@dateType='Issued'])) &gt; 3">
<xsl:value-of select="//*[local-name()='date'][@dateType='Issued']" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="//*[local-name()='publicationYear']" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:value-of select="dateCleaner:dateISO( normalize-space($theDate) )" />
</oaf:dateAccepted>
<xsl:choose>
<xsl:when test="//*[local-name() = 'rights']/@rightsURI[starts-with(normalize-space(.), 'info:eu-repo/semantics')]">
<oaf:accessrights>
<xsl:value-of select="vocabulary:clean( //*[local-name() = 'rights']/@rightsURI[starts-with(normalize-space(.), 'info:eu-repo/semantics')], 'dnet:access_modes')" />
</oaf:accessrights>
</xsl:when>
<xsl:otherwise>
<xsl:choose>
<xsl:when test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'http://creativecommons.org') or starts-with(normalize-space(.), 'Creative Commons') or starts-with(normalize-space(.), 'GNU LESSER GENERAL PUBLIC LICENSE')]">
<oaf:accessrights>
<xsl:text>OPEN</xsl:text>
</oaf:accessrights>
</xsl:when>
<xsl:when test="//*[local-name() = 'rights']/@rightsURI[starts-with(normalize-space(.), 'http://creativecommons.org') or starts-with(normalize-space(.), 'http://opendatacommons.org')]">
<oaf:accessrights>
<xsl:text>OPEN</xsl:text>
</oaf:accessrights>
</xsl:when>
<xsl:when test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'Open access data at least for academic use')]">
<oaf:accessrights>
<xsl:text>RESTRICTED</xsl:text>
</oaf:accessrights>
</xsl:when>
<xsl:otherwise>
<oaf:accessrights>
<xsl:text>UNKNOWN</xsl:text>
</oaf:accessrights>
</xsl:otherwise>
</xsl:choose>
</xsl:otherwise>
</xsl:choose>
<xsl:for-each select="//*[local-name()='rights']/@rightsURI[starts-with(normalize-space(.), 'http') and matches(., '.*(/licenses|/publicdomain|unlicense.org/|/legal-and-data-protection-notices|/download/license|/open-government-licence).*')]">
<oaf:license>
<xsl:value-of select="." />
</oaf:license>
</xsl:for-each>
<oaf:language>
<xsl:value-of select="vocabulary:clean( //*[local-name()='language'], 'dnet:languages')" />
</oaf:language>
<oaf:hostedBy>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName" />
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId" />
</xsl:attribute>
</oaf:hostedBy>
<oaf:collectedFrom>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName" />
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId" />
</xsl:attribute>
</oaf:collectedFrom>
</metadata>
<xsl:copy-of select="//*[local-name() = 'about']" />
</record>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
</xsl:copy>
</xsl:template>
<xsl:template match="//*[local-name() = 'metadata']//*[local-name() = 'resource']">
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
</xsl:copy>
</xsl:template>
<xsl:template match="//*[local-name() = 'resource']/*[local-name()='identifier']">
<xsl:if test=".[@identifierType='Handle'][contains(., '://hdl.handle.net/')]">
<xsl:element name="identifier" namespace="http://datacite.org/schema/kernel-4">
<xsl:attribute name="identifierType">
<xsl:value-of select="'Handle'" />
</xsl:attribute>
<xsl:value-of select="." />
</xsl:element>
</xsl:if>
</xsl:template>
<xsl:template match="//*[local-name() = 'resource']/*[local-name()='alternateIdentifier']">
<xsl:choose>
<xsl:when test="@alternateIdentifierType">
<xsl:copy-of select="." />
</xsl:when>
<xsl:otherwise>
<xsl:element name="alternateIdentifier" namespace="http://datacite.org/schema/kernel-4">
<xsl:attribute name="alternateIdentifierType">
<xsl:value-of select="./@identifierType" />
</xsl:attribute>
<xsl:value-of select="." />
</xsl:element>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="//*[local-name() = 'header']">
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
<xsl:element name="dr:dateOfTransformation">
<xsl:value-of select="$transDate" />
</xsl:element>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>

View File

@ -15,7 +15,7 @@
exclude-result-prefixes="xsl vocabulary dateCleaner">
<xsl:param name="varOfficialName" />
<xsl:param name="varDsType" />
<!-- deprecated , marked to remove <xsl:param name="varDsType" /> -->
<xsl:param name="varDataSourceId" />
<xsl:param name="varFP7" select = "'corda_______::'"/>
@ -68,6 +68,13 @@
<xsl:call-template name="validRecord" />
</xsl:template>
<!-- skip/terminate not compliant record -->
<xsl:template name="terminate">
<xsl:message terminate="yes">
record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<xsl:template name="validRecord">
<record>
<xsl:apply-templates select="//*[local-name() = 'header']" />
@ -276,6 +283,9 @@
<xsl:value-of select="$varEmbargoEndDate"/>
</oaf:embargoenddate>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="terminate"/>
</xsl:otherwise>
</xsl:choose>
</xsl:if>
@ -301,6 +311,9 @@
</dr:CobjCategory>
-->
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="terminate"/>
</xsl:otherwise>
</xsl:choose>
<!-- review status -->

View File

@ -0,0 +1,226 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:TransformationFunction="eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" exclude-result-prefixes="TransformationFunction" extension-element-prefixes="TransformationFunction" version="1.1">
<xsl:param name="varOfficialName" />
<xsl:param name="varDsType" />
<xsl:param name="varDataSourceId" />
<xsl:param name="index" select="0" />
<xsl:param name="transDate" select="current-dateTime()" />
<xsl:variable name="tf" select="TransformationFunction:getInstance()" />
<xsl:template match="/">
<xsl:variable name="datasourcePrefix" select="normalize-space(//oaf:datasourceprefix)" />
<xsl:call-template name="validRecord" />
</xsl:template>
<xsl:template name="terminate">
<xsl:message terminate="yes">
record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<xsl:template name="validRecord">
<record>
<xsl:apply-templates select="//*[local-name() = 'header']" />
<metadata>
<xsl:apply-templates select="//*[local-name() = 'metadata']//*[local-name() = 'resource']" /> <!-- for CoCoON many deleted records appeared among the transformed records -->
<xsl:if test="//oai:header/@status='deleted'">
<xsl:call-template name="terminate" />
</xsl:if>
<xsl:for-each select="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='Handle'][not(. = '123456789')]">
<oaf:identifier>
<xsl:attribute name="identifierType">
<xsl:value-of select="'handle'" />
</xsl:attribute>
<xsl:if test="contains(., '://hdl.handle.net/')">
<xsl:value-of select="substring-after(., '://hdl.handle.net/')" />
</xsl:if>
<xsl:if test="not(contains(., '://hdl.handle.net/'))">
<xsl:value-of select="." />
</xsl:if>
</oaf:identifier>
</xsl:for-each>
<xsl:for-each select="//*[local-name() = 'resource']/*[local-name()='identifier'][@identifierType='DOI']">
<oaf:identifier>
<xsl:attribute name="identifierType">
<xsl:value-of select="'doi'" />
</xsl:attribute>
<xsl:if test="contains(., '://dx.doi.org/')">
<xsl:value-of select="substring-after(., '://dx.doi.org/')" />
</xsl:if>
<xsl:if test="not(contains(., '://dx.doi.org/'))">
<xsl:value-of select="." />
</xsl:if>
</oaf:identifier>
</xsl:for-each> <!-- This is the only difference with the generic datacite repo rule: since all datasets from Nakala are Images -->
<xsl:if test="lower-case(//*[local-name()='resourceType']/@*[local-name()='resourceTypeGeneral']) = 'image'">
<oaf:concept>
<xsl:attribute name="id">
<xsl:value-of select="'dariah'" />
</xsl:attribute>
</oaf:concept>
</xsl:if>
<xsl:if test="//*[local-name()='date']/@dateType='Available' and //*[local-name()='datasourceprefix']!='r33ffb097cef'">
<xsl:variable name="varEmbargoEndDate" select="TransformationFunction:convertString($tf, normalize-space(//*[local-name()='date'][@dateType='Available']), 'DateISO8601')" />
<xsl:choose>
<xsl:when test="string-length($varEmbargoEndDate) &gt; 0">
<oaf:embargoenddate>
<xsl:value-of select="$varEmbargoEndDate" />
</oaf:embargoenddate>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="terminate" />
</xsl:otherwise>
</xsl:choose>
</xsl:if>
<xsl:variable name="varTypLst" select="distinct-values((//*[local-name()='resourceType']/(., @resourceTypeGeneral)))" />
<xsl:variable name="varCobjCatLst" select="distinct-values((for $i in $varTypLst return TransformationFunction:convertString($tf, normalize-space($i), 'TextTypologies')))" />
<xsl:variable name="varCobjSupLst" select="for $i in $varCobjCatLst return concat($i, '###', TransformationFunction:convertString($tf, normalize-space($i), 'SuperTypes'))" />
<dr:CobjCategory>
<xsl:choose>
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))]) &gt; 0">
<xsl:variable name="varCobjSup" select="$varCobjSupLst[not(substring-after(., '###') = 'other') and not(substring-before(., '###') = ('0038', '0039', '0040'))][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')" />
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:when test="count($varCobjSupLst[not(substring-after(., '###') = 'other')]) &gt; 0">
<xsl:variable name="varCobjSup" select="$varCobjSupLst[not(substring-after(., '###') = 'other')][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')" />
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))]) &gt; 0">
<xsl:variable name="varCobjSup" select="$varCobjSupLst[not(substring-before(., '###') = ('0020', '0000'))][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')" />
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:when test="count($varCobjSupLst[not(substring-before(., '###') = ('0000'))]) &gt; 0">
<xsl:variable name="varCobjSup" select="$varCobjSupLst[not(substring-before(., '###') = ('0000'))][1]" />
<xsl:attribute name="type" select="substring-after($varCobjSup, '###')" />
<xsl:value-of select="substring-before($varCobjSup, '###')" />
</xsl:when>
<xsl:otherwise>
<xsl:attribute name="type" select="'other'" />
<xsl:value-of select="'0000'" />
</xsl:otherwise>
</xsl:choose>
</dr:CobjCategory> <!-- review status --> <!-- no review hints found in resource type declarations, no version declarations found -->
<xsl:variable name="varRefereedConvt" select="for $i in ( //*[local-name()='resourceType']/(., @resourceTypeGeneral), //oai:setSpec, //*[local-name()='description']) return TransformationFunction:convertString($tf, normalize-space($i), 'ReviewLevels')" />
<xsl:variable name="varRefereedIdntf" select="( //*[local-name()=('identifier', 'alternateIdentifier')][count(//*[local-name()=('metadata', 'resource')]//*[local-name()=('identifier', 'alternateIdentifier')]) = 1][matches(lower-case(.), '(^|.*[\.\-_\\/\s\(\)%\d#:])pre[\.\-_\\/\s\(\)%\d#:]?prints?([\.\-_\\/\s\(\)%\d#:].*)?$')]/'0002', //*[local-name()=('identifier', 'alternateIdentifier')][count(//*[local-name()=('metadata', 'resource')]//*[local-name()=('identifier', 'alternateIdentifier')]) = 1][matches(lower-case(.), '(^|.*[\.\-_\\/\s\(\)%\d#:])refereed([\.\-_\\/\s\(\)%\d#:].*)?$')]/'0001', //*[local-name()=('identifier', 'alternateIdentifier')][count(//*[local-name()=('metadata', 'resource')]//*[local-name()=('identifier', 'alternateIdentifier')]) = 1][matches(lower-case(.), '.*-peer-reviewed-(fulltext-)?article-.*')]/'0001')" />
<xsl:variable name="varRefereedVersn" select="(//*[local-name()='version'][matches(lower-case(.), '.*peer[\s\-\.\\_/:%]?reviewed.*')]/'0001', //*[local-name()='version'][matches(normalize-space(lower-case(.)), '^(v|vs|version|rel|release)?[\s\.\-_]*0$')]/'0002', //*[local-name()='version'][matches(lower-case(.), '(^|[\s\-\.\\_/:%].*)(beta|draft|trial|test)([\s\-\.\\_/:%].*|$)')]/'0002', //*[local-name()='version'][matches(lower-case(.), '.*submi(tted|ssion|ttal).*')]/'0002') " />
<xsl:variable name="varRefereedOther" select="(//*[local-name()='publisher'][matches(lower-case(.), '.*[\s\-\.\\_/:%]pre[\s\-\.\\_/:%]?prints?([\s\-\.\\_/:%].*|$)')]/'0002', //*[local-name()='description'][matches(lower-case(.), '^peer[\s\-\.\\_/:%]?reviewed$')]/'0001', //*[local-name()='description'][matches(lower-case(.), '^pre[\s\-\.\\_/:%]?prints?$')]/'0002') " />
<xsl:variable name="varRefereedReltn" select="//*[local-name() = 'relatedIdentifier'][./@relationType/lower-case(.)='isreviewedby']/'0001'" />
<xsl:variable name="varRefereedDesct" select="(//*[local-name() = 'description'] [matches(lower-case(.), '.*(this\s*book|this\s*volume|it)\s*constitutes\s*the\s*(thoroughly\s*)?refereed') or matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or matches(lower-case(.), '(this|a)\s*(article|preprint)\s*(has\s*been\s*)?(peer[\-\s]*)?reviewed\s*and\s*recommended\s*by\s*peer[\-\s]*community')]/'0001')" />
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedIdntf, $varRefereedReltn, $varRefereedVersn, $varRefereedOther, $varRefereedReltn, $varRefereedDesct)" />
<xsl:choose>
<xsl:when test="count($varRefereed[. = '0001']) &gt; 0">
<oaf:refereed>
<xsl:value-of select="'0001'" />
</oaf:refereed>
</xsl:when>
<xsl:when test="count($varRefereed[. = '0002']) &gt; 0">
<oaf:refereed>
<xsl:value-of select="'0002'" />
</oaf:refereed>
</xsl:when>
</xsl:choose>
<oaf:dateAccepted>
<xsl:value-of select="TransformationFunction:convertString($tf, normalize-space(//*[local-name()='publicationYear']), 'DateISO8601')" />
</oaf:dateAccepted>
<xsl:choose>
<xsl:when test="//*[local-name() = 'rights']/@rightsURI[starts-with(normalize-space(.), 'info:eu-repo/semantics')]">
<oaf:accessrights>
<xsl:value-of select="TransformationFunction:convertString($tf, //*[local-name() = 'rights']/@rightsURI[starts-with(normalize-space(.), 'info:eu-repo/semantics')], 'AccessRights')" />
</oaf:accessrights>
</xsl:when>
<xsl:otherwise>
<xsl:choose>
<xsl:when test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'http://creativecommons.org') or starts-with(normalize-space(.), 'Creative Commons') or starts-with(normalize-space(.), 'GNU LESSER GENERAL PUBLIC LICENSE')]">
<oaf:accessrights>
<xsl:text>OPEN</xsl:text>
</oaf:accessrights>
</xsl:when>
<xsl:when test="//*[local-name() = 'rights']/@rightsURI[starts-with(normalize-space(.), 'http://creativecommons.org') or starts-with(normalize-space(.), 'http://opendatacommons.org')]">
<oaf:accessrights>
<xsl:text>OPEN</xsl:text>
</oaf:accessrights>
</xsl:when>
<xsl:when test="//*[local-name() = 'rights'][starts-with(normalize-space(.), 'Open access data at least for academic use')]">
<oaf:accessrights>
<xsl:text>RESTRICTED</xsl:text>
</oaf:accessrights>
</xsl:when>
<xsl:otherwise>
<oaf:accessrights>
<xsl:text>UNKNOWN</xsl:text>
</oaf:accessrights>
</xsl:otherwise>
</xsl:choose>
</xsl:otherwise>
</xsl:choose>
<xsl:for-each select="//*[local-name()='rights']/@rightsURI[starts-with(normalize-space(.), 'http') and matches(., '.*(/licenses|/publicdomain|unlicense.org/|/legal-and-data-protection-notices|/download/license|/open-government-licence).*')]">
<oaf:license>
<xsl:value-of select="." />
</oaf:license>
</xsl:for-each>
<oaf:language>
<xsl:value-of select="TransformationFunction:convert($tf, //*[local-name()='language'], 'Languages')" />
</oaf:language>
<oaf:hostedBy>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName" />
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId" />
</xsl:attribute>
</oaf:hostedBy>
<oaf:collectedFrom>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName" />
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId" />
</xsl:attribute>
</oaf:collectedFrom>
</metadata>
<xsl:copy-of select="//*[local-name() = 'about']" />
</record>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
</xsl:copy>
</xsl:template>
<xsl:template match="//*[local-name() = 'metadata']//*[local-name() = 'resource']">
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
</xsl:copy>
</xsl:template>
<xsl:template match="//*[local-name() = 'resource']/*[local-name()='identifier']">
<xsl:if test=".[@identifierType='Handle'][contains(., '://hdl.handle.net/')]">
<xsl:element name="identifier" namespace="http://datacite.org/schema/kernel-4">
<xsl:attribute name="identifierType">
<xsl:value-of select="'Handle'" />
</xsl:attribute>
<xsl:value-of select="." />
</xsl:element>
</xsl:if>
</xsl:template>
<xsl:template match="//*[local-name() = 'resource']/*[local-name()='alternateIdentifier']">
<xsl:choose>
<xsl:when test="@alternateIdentifierType">
<xsl:copy-of select="." />
</xsl:when>
<xsl:otherwise>
<xsl:element name="alternateIdentifier" namespace="http://datacite.org/schema/kernel-4">
<xsl:attribute name="alternateIdentifierType">
<xsl:value-of select="./@identifierType" />
</xsl:attribute>
<xsl:value-of select="." />
</xsl:element>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="//*[local-name() = 'header']">
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
<xsl:element name="dr:dateOfTransformation">
<xsl:value-of select="$transDate" />
</xsl:element>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,282 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.1"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:TransformationFunction="eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy"
extension-element-prefixes="TransformationFunction"
exclude-result-prefixes="TransformationFunction">
<!--
Metadata records contain no embracing resource element, but only a dataset element. Therefore, references to top element resource are adapted, and then namespaces are no copied into each single node. Resource added.
-->
<xsl:param name="varOfficialName" />
<xsl:param name="varDsType" />
<xsl:param name="varDataSourceId" />
<xsl:param name="varFP7" select="'corda_______::'"/>
<xsl:param name="varH2020" select="'corda__h2020::'"/>
<xsl:param name="index" select="0"/>
<xsl:variable name="tf" select="TransformationFunction:getInstance()"/>
<xsl:template match="/">
<xsl:variable name="datasourcePrefix"
select="normalize-space(//oaf:datasourceprefix)" />
<xsl:call-template name="validRecord" />
</xsl:template>
<xsl:template name="terminate">
<xsl:message terminate="yes">
record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<xsl:template name="validRecord">
<record>
<xsl:copy-of select="//*[local-name() = 'header']" />
<metadata>
<resource>
<!--
<xsl:apply-templates select="//*[local-name() = 'metadata']//*[local-name() = ('resource','dataset')]"/>
<xsl:apply-templates select="//*[local-name() = 'metadata']"/>
-->
<xsl:apply-templates select="//*[local-name() = 'metadata']//*[local-name() = ('resource','dataset')]/*"/>
<xsl:call-template name="creators" />
<xsl:choose>
<xsl:when test="//*[local-name() = 'datasourceprefix'][.='r3853b408a4c']">
<xsl:copy-of copy-namespaces="no" select="//*[local-name() = 'metadata']//*[local-name() = ('resource','dataset')]/*[not(local-name()=('creators', 'alternateIdentifiers', 'relatedIdentifier'))]" />
</xsl:when>
<xsl:when test="//*[local-name() = 'datasourceprefix'][.='r38d07aef7b7']">
<xsl:copy-of copy-namespaces="no" select="//*[local-name() = 'metadata']//*[local-name() = ('resource','dataset')]/*[not(local-name()=('creators', 'alternateIdentifiers', 'relatedIdentifier', 'identifier'))]" />
</xsl:when>
</xsl:choose>
<xsl:call-template name="relatedIdentifiers" />
</resource>
<!-- OpenAIRE fields -->
<!-- resource type, super type -->
<xsl:choose>
<!--
<xsl:when test="//*[local-name()='resourceType']/@resourceTypeGeneral='Dataset' or //*[local-name()='resourceType']/@resourceTypeGeneral='Software' or //*[local-name()='resourceType']/@resourceTypeGeneral='Collection' or //*[local-name()='resourceType']/@resourceTypeGeneral='Film' or //*[local-name()='resourceType']/@resourceTypeGeneral='Sound' or //*[local-name()='resourceType']/@resourceTypeGeneral='PhysicalObject' or //*[local-name()='resourceType']/@resourceTypeGeneral='Audiovisual'">
-->
<xsl:when test="//*[local-name()='resourceType'][lower-case(.)='article'] or lower-case(//*[local-name()='resourceType']/@resourceTypeGeneral)=('dataset', 'software', 'collection', 'film', 'sound', 'physicalobject', 'audiovisual', 'model', 'workflow', 'service', 'image') or (//*[local-name()='resourceType'][lower-case(./@resourceTypeGeneral)='other' and lower-case(.)=('study', 'egi virtual appliance')])">
<xsl:variable name="varCobjCategory" select="TransformationFunction:convertString($tf, distinct-values(//*[local-name()='resourceType']/@resourceTypeGeneral), 'TextTypologies')" />
<xsl:variable name="varSuperType" select="TransformationFunction:convertString($tf, $varCobjCategory, 'SuperTypes')"/>
<dr:CobjCategory>
<xsl:attribute name="type" select="$varSuperType"/>
<xsl:value-of select="$varCobjCategory" />
</dr:CobjCategory>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="terminate"/>
</xsl:otherwise>
</xsl:choose>
<!-- review status -->
<!-- no review hints found for Reactome, and kaggle collection being broken -->
<!-- date -->
<oaf:dateAccepted>
<xsl:value-of select="TransformationFunction:convertString($tf, normalize-space(//*[local-name()='publicationYear']), 'DateISO8601')"/>
</oaf:dateAccepted>
<!-- access level, licenses -->
<xsl:choose>
<xsl:when test="//*[local-name() = 'rightsList']/*[local-name() = 'rights']/@rightsURI">
<xsl:for-each select="//*[local-name() = 'rights']/@rightsURI">
<oaf:accessrights>
<xsl:value-of select="TransformationFunction:convertString($tf, concat('http',substring-after(., 'https')), 'AccessRights')" />
</oaf:accessrights>
<oaf:license>
<xsl:value-of select="."/>
</oaf:license>
</xsl:for-each>
</xsl:when>
<xsl:otherwise>
<oaf:accessrights>
<xsl:value-of select="'UNKNOWN'" />
</oaf:accessrights>
</xsl:otherwise>
</xsl:choose>
<!-- language -->
<oaf:language>
<xsl:value-of select="TransformationFunction:convert($tf, //*[local-name()='language'], 'Languages')" />
</oaf:language>
<!-- hostedBy, collectedFrom -->
<oaf:hostedBy>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName"/>
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId"/>
</xsl:attribute>
</oaf:hostedBy>
<oaf:collectedFrom>
<xsl:attribute name="name">
<xsl:value-of select="$varOfficialName"/>
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="$varDataSourceId"/>
</xsl:attribute>
</oaf:collectedFrom>
</metadata>
<xsl:copy-of select="//*[local-name() = 'about']" />
</record>
</xsl:template>
<xsl:template match="node()|@*">
<!--
<xsl:copy copy-namespaces="no">
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
-->
</xsl:template>
<xsl:template match="//*[local-name() = ('resource','dataset')]/*[local-name()='alternateIdentifiers']">
<!--
<xsl:element name="alternateIdentifiers" namespace="http://www.openarchives.org/OAI/2.0/">
-->
<xsl:if test="//*[local-name() = 'datasourceprefix'][.='r3853b408a4c']">
<alternateIdentifiers>
<!--
<xsl:copy-of select="./*" copy-namespaces="no"/>
-->
<alternateIdentifier>
<xsl:attribute name="alternateIdentifierType" select="'LandingPage'"/>
<xsl:value-of select="concat('https://reactome.org/content/detail/', substring-after(//*[local-name() = 'recordIdentifier'], 'https://reactome.org/PathwayBrowser/#/'))" />
</alternateIdentifier>
</alternateIdentifiers>
</xsl:if>
<xsl:if test="//*[local-name() = 'datasourceprefix'][.='r38d07aef7b7']">
<alternateIdentifiers>
<!--
<xsl:copy-of select="./*" copy-namespaces="no"/>
-->
<alternateIdentifier>
<xsl:attribute name="alternateIdentifierType" select="'local accession id'"/>
<xsl:value-of select="//*[local-name() = 'recordIdentifier']" />
</alternateIdentifier>
</alternateIdentifiers>
<identifier>
<xsl:attribute name="identifierType" select="'URL'"/>
<xsl:value-of select="//*[local-name() = 'alternateIdentifiers']/*[local-name() = 'alternateIdentifier'][./@*[local-name()='alternateIdentifierType']='URL']" />
</identifier>
</xsl:if>
</xsl:template>
<!--
<xsl:template match="//*[local-name() = ('resource','dataset')]/*[local-name()='identifier']">
<xsl:copy-of select="." copy-namespaces="no" />
<xsl:if test="not(//*[local-name() = ('resource','dataset')]/*[local-name()='alternateIdentifiers'])">
<xsl:element name="alternateIdentifiers" namespace="http://www.openarchives.org/OAI/2.0/">
</xsl:element>
</xsl:if>
</xsl:template>
-->
<!--
<xsl:template match="//*[local-name()='language']">
<oaf:language>
<xsl:value-of select="TransformationFunction:convert($tf, //*[local-name()='language'], 'Languages')" />
</oaf:language>
</xsl:template>
-->
<xsl:template name="relatedIdentifiers">
<relatedIdentifiers>
<xsl:copy-of select="//*[local-name()='relatedIdentifier']" copy-namespaces="no"/>
<xsl:for-each select="//*[local-name() = ('resource','dataset')]/*[local-name()='relatedIdentifier'][starts-with(.,'http://www.ncbi.nlm.nih.gov/pubmed/')]">
<xsl:element name="relatedIdentifier">
<xsl:attribute name="relatedIdentifierType" select="'PMID'"/>
<xsl:attribute name="relationType" select="./@*[local-name()='relationType']"/>
<xsl:value-of select="substring-after(., 'http://www.ncbi.nlm.nih.gov/pubmed/')" />
</xsl:element>
</xsl:for-each>
</relatedIdentifiers>
</xsl:template>
<xsl:template name="creators">
<xsl:choose>
<xsl:when test="//*[local-name() = 'datasourceprefix'][.='r3853b408a4c']">
<creators>
<xsl:for-each select="//*[local-name() = 'creators']/*[local-name()='creator']">
<creator>
<xsl:choose>
<xsl:when test="contains(./*[local-name() = 'creatorName'], ',')">
<xsl:variable name="varContributorNameReverse" select="TransformationFunction:convertString($tf, ./*[local-name()='creatorName'], 'Person')"/>
<xsl:variable name="varFamilyName" select="normalize-space(substring-after($varContributorNameReverse, ','))"/>
<xsl:variable name="varGivenName" select="normalize-space(substring-before($varContributorNameReverse, ','))"/>
<creatorName>
<xsl:value-of select="concat($varFamilyName, ', ', $varGivenName)" />
</creatorName>
<givenName>
<xsl:value-of select="$varGivenName" />
</givenName>
<familyName>
<xsl:value-of select="$varFamilyName" />
</familyName>
</xsl:when>
<xsl:when test="string-length(./*[local-name() = 'creatorName']) > 0 and not(contains(./*[local-name() = 'creatorName'], ','))">
<creatorName>
<xsl:value-of select="TransformationFunction:convertString($tf, ./*[local-name()='creatorName'], 'Person')" />
</creatorName>
</xsl:when>
</xsl:choose>
<xsl:for-each select="./*[local-name()='affiliation']">
<affiliation>
<xsl:value-of select="." />
</affiliation>
</xsl:for-each>
</creator>
</xsl:for-each>
<xsl:if test="not(//*[local-name() = 'creators']/*[local-name()='creator'])">
<xsl:call-template name="terminate"/>
</xsl:if>
</creators>
</xsl:when>
<xsl:when test="//*[local-name() = 'datasourceprefix'][.='r38d07aef7b7']">
<creators>
<xsl:for-each select="//*[local-name() = 'creators']/*[local-name()='creator']">
<creator>
<xsl:choose>
<xsl:when test="string-length(./*[local-name() = 'creatorName']) > 0">
<creatorName>
<xsl:value-of select="./*[local-name()='creatorName']" />
</creatorName>
</xsl:when>
</xsl:choose>
</creator>
</xsl:for-each>
<!--
<xsl:if test="not(//*[local-name() = 'creators']/*[local-name()='creator']) or //*[local-name() = 'creators']/*[local-name()='creator']/*[local-name()='creatorName'][starts-with(., 'test ') and ends-with(., ' + eval(location.hash.slice(1)), //')]">
-->
<xsl:if test="not(//*[local-name() = 'creators']/*[local-name()='creator'])">
<xsl:call-template name="terminate"/>
</xsl:if>
<xsl:if test="//*[local-name() = 'creators']/*[local-name()='creator']/*[local-name()='creatorName'][starts-with(., 'test ') and ends-with(., ' + eval(location.hash.slice(1)), //')]">
<xsl:call-template name="terminate"/>
</xsl:if>
</creators>
</xsl:when>
<xsl:otherwise>
<xsl:copy-of select="//*[local-name() = ('resource','dataset')]/*[local-name()='creators']" copy-namespaces="no"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,95 @@
<xsl:stylesheet xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:date="http://exslt.org/dates-and-times" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:datacite="http://datacite.org/schema/kernel-4" version="2.0">
<xsl:output indent="yes" />
<xsl:param name="varOfficialName" />
<xsl:param name="varDataSourceId" />
<xsl:param name="quote">"</xsl:param>
<xsl:variable name="baseURL" select="string('https://raw.githubusercontent.com/OpenAPC/openapc-de/master/data/apc_de.csv')" />
<xsl:template name="terminate">
<xsl:message terminate="yes"> record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<xsl:template match="/">
<xsl:call-template name="oapcRecord" />
</xsl:template>
<xsl:template name="oapcRecord">
<xsl:variable name="period" select="//column[./@name = 'period']/text()" />
<xsl:variable name="institution" select="//column[./@name = 'institution']/text()" />
<xsl:variable name="doi" select="//column[./@name = 'doi']/text()" />
<xsl:variable name="euro" select="//column[./@name = 'euro']/text()" />
<xsl:variable name="issn" select="//column[./@name = 'issn']/text()" />
<xsl:variable name="jftitle" select="//column[./@name = 'journal_full_title']/text()" />
<xsl:variable name="pmcid" select="//column[./@name = 'pmcid']/text()" />
<xsl:variable name="pmid" select="//column[./@name = 'pmid']/text()" />
<!-- xsl:variable name="gridid" select="//column[./@name = 'grid_id']/text()"/>
<xsl:variable name="rorid" select="//column[./@name = 'ror_id']/text()"/ -->
<xsl:variable name="license" select="//column[./@name = 'license_ref']/text()" />
<oai:record>
<oai:header>
<xsl:copy-of copy-namespaces="no" select="//*[local-name() = 'header']/*" />
<dr:dateOfTransformation>
<xsl:value-of select="date:date-time()" />
</dr:dateOfTransformation>
</oai:header>
<metadata xmlns="http://namespace.openaire.eu/">
<oaf:identifier identifierType="doi">
<xsl:value-of select="$doi" />
</oaf:identifier>
<oaf:identifier identifierType="pmcid">
<xsl:value-of select="$pmcid" />
</oaf:identifier>
<oaf:identifier identifierType="pmid">
<xsl:value-of select="$pmid" />
</oaf:identifier>
<!-- xsl:when test="//column[./@name = 'grid_id']/text()">
<datacite:affiliation affiliationIdentifier="$gridid" affiliationIdentifierScehme="GRID" SchemeURI="https://www.grid.ac/">
<xsl:value-of select="$institution" />
</datacite:affiliation>
</xsl:when -->
<oaf:processingchargeamount>
<xsl:attribute name="currency">EUR</xsl:attribute>
<xsl:value-of select="$euro" />
</oaf:processingchargeamount>
<oaf:journal>
<xsl:attribute name="issn">
<xsl:value-of select="$issn" />
</xsl:attribute>
<xsl:value-of select="$jftitle" />
</oaf:journal>
<dc:license>
<xsl:value-of select="$license" />
</dc:license>
<dc:date>
<xsl:value-of select="$period" />
</dc:date>
<dr:CobjCategory type="publication">0004</dr:CobjCategory>
<oaf:accessrights>OPEN</oaf:accessrights>
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
<oaf:hostedBy>
<xsl:attribute name="name">Global OpenAPC Initiative</xsl:attribute>
<xsl:attribute name="id">openaire____::openapc_initiative</xsl:attribute>
</oaf:hostedBy>
<oaf:collectedFrom>
<xsl:attribute name="name">Global OpenAPC Initiative</xsl:attribute>
<xsl:attribute name="id">openaire____::openapc_initiative</xsl:attribute>
</oaf:collectedFrom>
</metadata>
<oaf:about xmlns:oai="http://wwww.openarchives.org/OAI/2.0/">
<oaf:datainfo>
<oaf:inferred>false</oaf:inferred>
<oaf:deletedbyinference>false</oaf:deletedbyinference>
<oaf:trust>0.9</oaf:trust>
<oaf:inferenceprovenance />
<oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive" classname="sysimport:crosswalk:datasetarchive" schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions" />
</oaf:datainfo>
</oaf:about>
</oai:record>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,116 @@
<xsl:stylesheet xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:date="http://exslt.org/dates-and-times"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:datacite="http://datacite.org/schema/kernel-4"
version="2.0">
<xsl:output indent="yes"/>
<xsl:param name="varOfficialName"/>
<xsl:param name="varDataSourceId"/>
<xsl:param name="quote">"</xsl:param>
<xsl:variable name="baseURL" select="string('https://raw.githubusercontent.com/OpenAPC/openapc-de/master/data/apc_de.csv')"/>
<xsl:template name="terminate">
<xsl:message terminate="yes"> record is not compliant, transformation is interrupted.
</xsl:message>
</xsl:template>
<xsl:template match="/">
<xsl:call-template name="oapcRecord"/>
</xsl:template>
<xsl:template name="oapcRecord">
<xsl:variable name="period" select="//column[./@name = 'period']/text()"/>
<xsl:variable name="institution" select="//column[./@name = 'institution']/text()"/>
<xsl:variable name="doi" select="//column[./@name = 'doi']/text()"/>
<xsl:variable name="euro" select="//column[./@name = 'euro']/text()"/>
<xsl:variable name="issn" select="//column[./@name = 'issn']/text()"/>
<xsl:variable name="jftitle" select="//column[./@name = 'journal_full_title']/text()"/>
<xsl:variable name="pmcid" select="//column[./@name = 'pmcid']/text()"/>
<xsl:variable name="pmid" select="//column[./@name = 'pmid']/text()"/>
<xsl:variable name="gridid" select="//column[./@name = 'grid_id']/text()"/>
<xsl:variable name="rorid" select="//column[./@name = 'ror_id']/text()"/>
<xsl:variable name="license" select="//column[./@name = 'license_ref']/text()"/>
<oai:record>
<oai:header>
<xsl:copy-of copy-namespaces="no" select="//*[local-name() = 'header']/*"/>
<dr:dateOfTransformation>
<xsl:value-of select="date:date-time()"/>
</dr:dateOfTransformation>
</oai:header>
<metadata xmlns="http://namespace.openaire.eu/">
<oaf:identifier identifierType="doi">
<xsl:value-of select="$doi" />
</oaf:identifier>
<oaf:identifier identifierType="pmcid">
<xsl:value-of select="$pmcid" />
</oaf:identifier>
<oaf:identifier identifierType="pmid">
<xsl:value-of select="$pmid" />
</oaf:identifier>
<xsl:when test="string-length($gridid) > 0">
<datacite:affiliation>
<xsl:attribute name="affiliationIdentifierScheme" select="'GRID'"/>
<xsl:attribute name="schemeURI" select="'https://www.grid.ac/'"/>
<xsl:attribute name="affiliationIdentifier" select="$gridid"/>
<xsl:value-of select="$institution" />
</datacite:affiliation>
</xsl:when>
<xsl:when test="string-length($rorid) > 0">
<datacite:affiliation>
<xsl:attribute name="affiliationIdentifierScheme" select="'ROR'"/>
<xsl:attribute name="affiliationIdentifier" select="$rorid"/>
<xsl:value-of select="$institution" />
</datacite:affiliation>
</xsl:when>
<oaf:processingchargeamount>
<xsl:attribute name="currency">EUR</xsl:attribute>
<xsl:value-of select="$euro" />
</oaf:processingchargeamount>
<oaf:journal>
<xsl:attribute name="issn">
<xsl:value-of select="$issn" />
</xsl:attribute>
<xsl:value-of select="$jftitle" />
</oaf:journal>
<dc:license>
<xsl:value-of select="$license" />
</dc:license>
<dc:date>
<xsl:value-of select="$period" />
</dc:date>
<dr:CobjCategory type="publication">0004</dr:CobjCategory>
<oaf:accessrights>OPEN</oaf:accessrights>
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
<oaf:hostedBy>
<xsl:attribute name="name">Global OpenAPC Initiative</xsl:attribute>
<xsl:attribute name="id">openaire____::openapc_initiative</xsl:attribute>
</oaf:hostedBy>
<oaf:collectedFrom>
<xsl:attribute name="name">Global OpenAPC Initiative</xsl:attribute>
<xsl:attribute name="id">openaire____::openapc_initiative</xsl:attribute>
</oaf:collectedFrom>
</metadata>
<oaf:about xmlns:oai="http://wwww.openarchives.org/OAI/2.0/">
<oaf:datainfo>
<oaf:inferred>false</oaf:inferred>
<oaf:deletedbyinference>false</oaf:deletedbyinference>
<oaf:trust>0.9</oaf:trust>
<oaf:inferenceprovenance/>
<oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive"
classname="sysimport:crosswalk:datasetarchive"
schemeid="dnet:provenanceActions"
schemename="dnet:provenanceActions"/>
</oaf:datainfo>
</oaf:about>
</oai:record>
</xsl:template>
</xsl:stylesheet>