diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig index d4eb71dc4..17d987fe8 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig @@ -26,17 +26,27 @@ dri:recordIdentifier = xpath:"//dri:recordIdentifier"; // apply xpath:"//dc:contributor[starts-with(., 'European Project')]" if xpath:"string-length(replace(., '.*(\d{6,6}).*', '$1')) = 6" oaf:projectid = xpath:"concat($var1, replace(., '.*(\d{6,6}).*', '$1'))"; else $varDummy = "''"; apply xpath:"//dc:creator" if xpath:"string-length(.) > 0 and normalize-space(.) != ','" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''"; if xpath:"//dc:title[string-length(.)> 0]" $varDummy = "''"; else dc:coverage = skipRecord(); + dc:title = xpath:"//dc:title[string-length(.) > 0]/normalize-space(.)"; apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''"; apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''"; + +TODO apply xpath:"//dc:source" if xpath:"string-length(.) > 0" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''"; + +TODO dc:contributor = xpath:"//dc:contributor"; + // dc:description = xpath:"//dc:description/normalize-space(.)"; //dc:description = xpath:"string-join(//dc:description/normalize-space(.), concat('; ',codepoints-to-string(10)))"; dc:description = xpath:"string-join(//dc:description/normalize-space(.), '; ')"; + dc:format = xpath:"//dc:format"; + + $varHttpTest = "''"; oaf:fulltext = xpath:"//dc:identifier[starts-with(., 'http') and (ends-with(., 'document') or ends-with(., 'pdf'))]"; + //if xpath:"//dc:identifier[starts-with(., 'http') and (ends-with(., 'document') or ends-with(., 'pdf'))] or //dc:relation[starts-with(lower-case(normalize-space(.)), 'info:eu-repo/grantagreement')] or //dc:rights[starts-with(lower-case(normalize-space(.)), 'open') or contains(lower-case(normalize-space(.)), 'openaccess')] or //dc:accessRights[contains(lower-case(normalize-space(.)), 'openaccess')]" $var0 = "''"; else dc:coverage = skipRecord(); if xpath:"//dc:identifier[starts-with(., 'http')]" $var0 = "''"; else dc:coverage = skipRecord(); apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)"; @@ -46,6 +56,8 @@ dc:type = xpath:"//dc:type"; dc:format = xpath:"//dc:format"; dc:date = xpath:"//dc:date"; dc:language = Convert(xpath:"//dc:language", Languages); + + $varDateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()"); if xpath:"starts-with($varDateAccepted, '0')" oaf:dateAccepted = $varDummy; else oaf:dateAccepted = $varDateAccepted; $varEmbargoEnd = xpath:"//dc:date[matches(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', 'i')][contains(lower-case(.), 'info:eu-repo')]/replace(normalize-space(.), '(.*)(info:eu-repo/date/embargoEnd/)(\d\d\d\d-\d\d-\d\d)', '$3', 'i')"; diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/xslt_cleaningOAplus_compliant_hal_hadoop.xsl b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/xslt_cleaningOAplus_compliant_hal_hadoop.xsl new file mode 100644 index 000000000..7e0d106e9 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/xslt_cleaningOAplus_compliant_hal_hadoop.xsl @@ -0,0 +1,431 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + record is not compliant, skip transformation. + + + + +
+ + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +