From 21f32b83c68f75fcf7122454f0f66cd16a5e0f2f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 11 Apr 2022 08:52:12 +0200 Subject: [PATCH 01/12] [graph enrichment] fixed country_propagation oozie workflow definition, parameter saveGraph is not needed anymore by the SparkCountryPropagationJob --- .../eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml index 202c86a6d..271ccbf72 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml @@ -274,7 +274,6 @@ --sourcePath${sourcePath}/publication --preparedInfoPath${workingDir}/publication - --saveGraph${saveGraph} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath}/publication @@ -304,7 +303,6 @@ --sourcePath${sourcePath}/dataset --preparedInfoPath${workingDir}/dataset - --saveGraph${saveGraph} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath}/dataset @@ -334,7 +332,6 @@ --sourcePath${sourcePath}/otherresearchproduct --preparedInfoPath${workingDir}/otherresearchproduct - --saveGraph${saveGraph} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath}/otherresearchproduct @@ -364,7 +361,6 @@ --sourcePath${sourcePath}/software --preparedInfoPath${workingDir}/software - --saveGraph${saveGraph} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath}/software From b93a141d6c76aa4e89413868f87a9b6604046ba1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 12 Apr 2022 10:25:42 +0200 Subject: [PATCH 02/12] [Doiboost] fixed fundingReference extraction from the Crossref records --- .../doiboost/crossref/Crossref2Oaf.scala | 39 +++++++------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 0cb08ea94..b4d6d67e2 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -446,16 +446,12 @@ case object Crossref2Oaf { case "10.13039/501100000781" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - case "10.13039/100000001" => - generateSimpleRelationFromAward(funder, "nsf_________", a => a) - case "10.13039/501100001665" => - generateSimpleRelationFromAward(funder, "anr_________", a => a) - case "10.13039/501100002341" => - generateSimpleRelationFromAward(funder, "aka_________", a => a) + case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a) + case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a) + case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a) case "10.13039/501100001602" => - generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", "")) - case "10.13039/501100000923" => - generateSimpleRelationFromAward(funder, "arc_________", a => a) + generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", "")) + case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a) case "10.13039/501100000038" => val targetId = getProjectId("nserc_______", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) @@ -468,14 +464,10 @@ case object Crossref2Oaf { val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) - case "10.13039/501100002848" => - generateSimpleRelationFromAward(funder, "conicytf____", a => a) - case "10.13039/501100003448" => - generateSimpleRelationFromAward(funder, "gsrt________", extractECAward) - case "10.13039/501100010198" => - generateSimpleRelationFromAward(funder, "sgov________", a => a) - case "10.13039/501100004564" => - generateSimpleRelationFromAward(funder, "mestd_______", extractECAward) + case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a) + case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward) + case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a => a) + case "10.13039/501100004564" => generateSimpleRelationFromAward(funder, "mestd_______", extractECAward) case "10.13039/501100003407" => generateSimpleRelationFromAward(funder, "miur________", a => a) val targetId = getProjectId("miur________", "1e5e62235d094afd01cd56e65112fc63") @@ -487,15 +479,11 @@ case object Crossref2Oaf { "irb_hr______", a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", "") ) - case "10.13039/501100006769" => - generateSimpleRelationFromAward(funder, "rsf_________", a => a) - case "10.13039/501100001711" => - generateSimpleRelationFromAward(funder, "snsf________", snsfRule) - case "10.13039/501100004410" => - generateSimpleRelationFromAward(funder, "tubitakf____", a => a) - case "10.10.13039/100004440" => - generateSimpleRelationFromAward(funder, "wt__________", a => a) + case "10.13039/501100006769" => generateSimpleRelationFromAward(funder, "rsf_________", a => a) + case "10.13039/501100001711" => generateSimpleRelationFromAward(funder, "snsf________", snsfRule) + case "10.13039/501100004410" => generateSimpleRelationFromAward(funder, "tubitakf____", a => a) case "10.13039/100004440" => + generateSimpleRelationFromAward(funder, "wt__________", a => a) val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) @@ -516,6 +504,7 @@ case object Crossref2Oaf { case "CONICYT, Programa de Formación de Capital Humano Avanzado" => generateSimpleRelationFromAward(funder, "conicytf____", extractECAward) case "Wellcome Trust Masters Fellowship" => + generateSimpleRelationFromAward(funder, "wt__________", a => a) val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) From d5b29d96a7c7b8d77e2b31599a65e4bd48164382 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 14 Apr 2022 11:07:04 +0200 Subject: [PATCH 03/12] fix merging in crossrefAggregator which creates dataInfo null --- .../main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index 9323c994c..79b9e8183 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -71,6 +71,7 @@ object SparkGenerateDoiBoost { } } else { if (a != null && a._2 != null) { + b.mergeOAFDataInfo(a._2) b.mergeFrom(a._2) b.setId(a._1) val authors = AuthorMerger.mergeAuthor(b.getAuthor, a._2.getAuthor) @@ -87,6 +88,7 @@ object SparkGenerateDoiBoost { return b2 } else { if (b2 != null) { + b1.mergeOAFDataInfo(b2) b1.mergeFrom(b2) val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor) b1.setAuthor(authors) From 4314db55c8bf94150cabe07d7b2342fc18ec1001 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 19 Apr 2022 15:05:02 +0200 Subject: [PATCH 04/12] migration to services: update sql queries --- ...eryDatasourceOrganization.sql => queryServiceOrganization.sql} | 0 .../dhp/oa/graph/sql/{queryDatasources.sql => queryServices.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/{queryDatasourceOrganization.sql => queryServiceOrganization.sql} (100%) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/{queryDatasources.sql => queryServices.sql} (100%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServiceOrganization.sql similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServiceOrganization.sql diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql From c96a8613f8152311480aec93b65a627d4f6421f3 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 20 Apr 2022 12:07:49 +0200 Subject: [PATCH 05/12] update SQL queries --- .../dhp/oa/graph/sql/queryOrganizations.sql | 2 +- .../oa/graph/sql/queryProjectOrganization.sql | 2 +- .../dhp/oa/graph/sql/queryProjects.sql | 2 +- .../oa/graph/sql/queryProjects_production.sql | 2 +- .../oa/graph/sql/queryServiceOrganization.sql | 8 +-- .../dhp/oa/graph/sql/queryServices.sql | 57 +++++++++++-------- 6 files changed, 40 insertions(+), 33 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql index 11a8c50af..3451333c0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql @@ -27,7 +27,7 @@ SELECT 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid FROM dsm_organizations o - LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom) + LEFT OUTER JOIN dsm_services d ON (d.id = o.collectedfrom) LEFT OUTER JOIN dsm_organizationpids p ON (p.organization = o.id) LEFT OUTER JOIN dsm_identities i ON (i.pid = p.pid) GROUP BY diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql index bcdef8221..d9a77427d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql @@ -16,4 +16,4 @@ SELECT FROM project_organization po LEFT OUTER JOIN projects p ON (p.id = po.project) - LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom); \ No newline at end of file + LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom); \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql index db0da83f7..af5913e75 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql @@ -42,7 +42,7 @@ SELECT LEFT OUTER JOIN projectpids pp ON (pp.project = p.id) LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid) - LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom) + LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom) LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id) LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql index 234bb7c3e..80cbda4b7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql @@ -40,7 +40,7 @@ SELECT LEFT OUTER JOIN projectpids pp ON (pp.project = p.id) LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid) - LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom) + LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom) LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id) LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServiceOrganization.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServiceOrganization.sql index f72e72105..02bed4b65 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServiceOrganization.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServiceOrganization.sql @@ -1,5 +1,5 @@ SELECT - dor.datasource AS datasource, + dor.service AS service, dor.organization AS organization, NULL AS startdate, NULL AS enddate, @@ -11,6 +11,6 @@ SELECT dc.officialname AS collectedfromname, 'providedBy@@@dnet:datasources_organizations_typologies' AS semantics, d.provenanceaction || '@@@dnet:provenanceActions' AS provenanceaction -FROM dsm_datasource_organization dor - LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id) - LEFT OUTER JOIN dsm_datasources dc ON (dc.id = d.collectedfrom); \ No newline at end of file +FROM dsm_service_organization dor + LEFT OUTER JOIN dsm_services d ON (dor.service = d.id) + LEFT OUTER JOIN dsm_services dc ON (dc.id = d.collectedfrom); \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql index 2623c65c4..81be79420 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql @@ -57,8 +57,12 @@ SELECT NULL AS odpolicies, ARRAY(SELECT trim(s) FROM unnest(string_to_array(d.languages, ',')) AS s) AS odlanguages, - ARRAY(SELECT trim(s) - FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes, + + -- Term provided only by OpenDOAR: + -- probably updating the TR it could be replaced by research_entity_types[] + -- But a study on the vocabulary terms is needed + -- REMOVED: ARRAY(SELECT trim(s) FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes, + false AS inferred, false AS deletedbyinference, 0.9 AS trust, @@ -69,22 +73,25 @@ SELECT d.releasestartdate AS releasestartdate, d.releaseenddate AS releaseenddate, d.missionstatementurl AS missionstatementurl, - d.dataprovider AS dataprovider, - d.serviceprovider AS serviceprovider, + -- the following 2 fields (provided by re3data) have been replaced by research_entity_types[] + -- VALUE 'Research Data' : d.dataprovider AS dataprovider, + -- VALUE 'Services' : d.serviceprovider AS serviceprovider, d.databaseaccesstype AS databaseaccesstype, d.datauploadtype AS datauploadtype, d.databaseaccessrestriction AS databaseaccessrestriction, d.datauploadrestriction AS datauploadrestriction, - d.versioning AS versioning, + -- REPLACED BY version_control : d.versioning AS versioning, + d.version_control AS versioning, d.citationguidelineurl AS citationguidelineurl, - d.qualitymanagementkind AS qualitymanagementkind, + -- REMOVED (it was provided only by re3data: yes, no, unknown): d.qualitymanagementkind AS qualitymanagementkind, d.pidsystems AS pidsystems, d.certificates AS certificates, ARRAY[]::text[] AS policies, dc.id AS collectedfromid, dc.officialname AS collectedfromname, - d.typology||'@@@dnet:datasource_typologies' AS datasourcetype, - d.typology||'@@@dnet:datasource_typologies_ui' AS datasourcetypeui, + d._typology_to_remove_||'@@@dnet:datasource_typologies' AS datasourcetype, + d.eosc_type||'@@@dnet:eosc_types' AS eosc_type, + d.eosc_datasource_type||'@@@dnet:eosc_datasource_types' AS eosc_datasoorce_type, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, d.issn AS issnPrinted, d.eissn AS issnOnline, @@ -92,16 +99,15 @@ SELECT d.consenttermsofuse AS consenttermsofuse, d.fulltextdownload AS fulltextdownload, d.consenttermsofusedate AS consenttermsofusedate, - de.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction, - de.thematic AS thematic, - de.knowledge_graph AS knowledgegraph, - array(select unnest(de.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies + d.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction, + d.thematic AS thematic, + -- REMOVED ???: d.knowledge_graph AS knowledgegraph, + array(select unnest(d.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies -FROM dsm_datasources d -LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id) -LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id) -LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource) -LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource) +FROM dsm_services d +LEFT OUTER JOIN dsm_services dc on (d.collectedfrom = dc.id) +LEFT OUTER JOIN dsm_api a ON (d.id = a.service) +LEFT OUTER JOIN dsm_servicepids di ON (d.id = di.service) GROUP BY d.id, @@ -119,15 +125,16 @@ GROUP BY d.releasestartdate, d.releaseenddate, d.missionstatementurl, - d.dataprovider, - d.serviceprovider, + -- TODO REMOVED ???: d.dataprovider, + -- TODO REMOVED ???: d.serviceprovider, d.databaseaccesstype, d.datauploadtype, d.databaseaccessrestriction, d.datauploadrestriction, - d.versioning, + -- REPLACED BY version_control : d.versioning, + d.version_control d.citationguidelineurl, - d.qualitymanagementkind, + -- REMOVED: d.qualitymanagementkind, d.pidsystems, d.certificates, dc.id, @@ -135,7 +142,7 @@ GROUP BY d.issn, d.eissn, d.lissn, - de.jurisdiction, - de.thematic, - de.knowledge_graph, - de.content_policies + d.jurisdiction, + d.thematic, + -- REMOVED ???: de.knowledge_graph, + d.content_policies From 5ffc24d1baead98eea96bb50bcef12488fcda3f1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 26 Apr 2022 16:18:41 +0200 Subject: [PATCH 06/12] EOSC Services - ongoing update --- .../raw/MigrateDbEntitiesApplication.java | 6 +-- .../dhp/oa/graph/sql/queryServices.sql | 51 +++++++++++++------ .../raw/MigrateDbEntitiesApplicationTest.java | 4 +- 3 files changed, 40 insertions(+), 21 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index b5801ca5c..b1be7f10a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -143,8 +143,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i smdbe.execute("queryClaims.sql", smdbe::processClaims); break; case openaire: - log.info("Processing datasources..."); - smdbe.execute("queryDatasources.sql", smdbe::processDatasource, verifyNamespacePrefix); + log.info("Processing services..."); + smdbe.execute("queryServices.sql", smdbe::processService, verifyNamespacePrefix); log.info("Processing projects..."); if (dbSchema.equalsIgnoreCase("beta")) { @@ -235,7 +235,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i dbClient.processResults(sql, consumer); } - public List processDatasource(final ResultSet rs) { + public List processService(final ResultSet rs) { try { final DataInfo info = prepareDataInfo(rs); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql index 81be79420..603b1e9b2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql @@ -1,6 +1,6 @@ SELECT d.id AS datasourceid, - d.id || array_agg(distinct di.pid) AS identities, + array_remove(d.id || array_agg(distinct di.pid) filter (where di.pid like 'piwik%') || array_agg(distinct dds.duplicate), NULL) AS identities, d.officialname AS officialname, d.englishname AS englishname, d.contactemail AS contactemail, @@ -57,7 +57,8 @@ SELECT NULL AS odpolicies, ARRAY(SELECT trim(s) FROM unnest(string_to_array(d.languages, ',')) AS s) AS odlanguages, - + ARRAY(SELECT trim(s) + FROM unnest(string_to_array(d.languages, ',')) AS s) AS languages, -- Term provided only by OpenDOAR: -- probably updating the TR it could be replaced by research_entity_types[] -- But a study on the vocabulary terms is needed @@ -74,40 +75,54 @@ SELECT d.releaseenddate AS releaseenddate, d.missionstatementurl AS missionstatementurl, -- the following 2 fields (provided by re3data) have been replaced by research_entity_types[] - -- VALUE 'Research Data' : d.dataprovider AS dataprovider, - -- VALUE 'Services' : d.serviceprovider AS serviceprovider, + -- VALUE 'Research Data' : d.dataprovider AS dataprovider, + -- VALUE 'Services' : d.serviceprovider AS serviceprovider, d.databaseaccesstype AS databaseaccesstype, d.datauploadtype AS datauploadtype, d.databaseaccessrestriction AS databaseaccessrestriction, d.datauploadrestriction AS datauploadrestriction, -- REPLACED BY version_control : d.versioning AS versioning, - d.version_control AS versioning, + d.version_control AS versioning, + d.version_control AS versioncontrol, d.citationguidelineurl AS citationguidelineurl, - -- REMOVED (it was provided only by re3data: yes, no, unknown): d.qualitymanagementkind AS qualitymanagementkind, + d.pidsystems AS pidsystems, d.certificates AS certificates, ARRAY[]::text[] AS policies, - dc.id AS collectedfromid, - dc.officialname AS collectedfromname, + array_remove(dc.id||'@@@'||dc.officialname || array_agg(distinct dds_cf.id||'@@@'||dds_cf.officialname), NULL) AS collectedfrom, + d._typology_to_remove_||'@@@dnet:datasource_typologies' AS datasourcetype, - d.eosc_type||'@@@dnet:eosc_types' AS eosc_type, - d.eosc_datasource_type||'@@@dnet:eosc_datasource_types' AS eosc_datasoorce_type, + d.eosc_type||'@@@dnet:eosc_types' AS eosctype, + d.eosc_datasource_type||'@@@dnet:eosc_datasource_types' AS eoscdatasourcetype, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, d.issn AS issnPrinted, d.eissn AS issnOnline, d.lissn AS issnLinking, + d.research_entity_types AS researchentitytypes, d.consenttermsofuse AS consenttermsofuse, d.fulltextdownload AS fulltextdownload, d.consenttermsofusedate AS consenttermsofusedate, - d.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction, - d.thematic AS thematic, - -- REMOVED ???: d.knowledge_graph AS knowledgegraph, - array(select unnest(d.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies + d.lastconsenttermsofusedate AS lastconsenttermsofusedate, + d.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction, + d.thematic AS thematic, + -- REMOVED ???: d.knowledge_graph AS knowledgegraph, + array(select unnest(d.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies, + d.submission_policy_url AS submissionpolicyurl, + d.preservation_policy_url AS preservationpolicyurl, + d.research_product_access_policies AS researchproductaccesspolicies, + d.research_product_metadata_access_policies AS researchproductmetadataaccesspolicies FROM dsm_services d LEFT OUTER JOIN dsm_services dc on (d.collectedfrom = dc.id) LEFT OUTER JOIN dsm_api a ON (d.id = a.service) LEFT OUTER JOIN dsm_servicepids di ON (d.id = di.service) +LEFT OUTER JOIN dsm_dedup_services dds ON (d.id = dds.id) +LEFT OUTER JOIN dsm_services dds_dup ON (dds.duplicate = dds_dup.id) +LEFT OUTER JOIN dsm_services dds_cf ON (dds_dup.collectedfrom = dds_cf.id) + + +WHERE + d.dedup_main_service = true GROUP BY d.id, @@ -132,7 +147,7 @@ GROUP BY d.databaseaccessrestriction, d.datauploadrestriction, -- REPLACED BY version_control : d.versioning, - d.version_control + d.version_control, d.citationguidelineurl, -- REMOVED: d.qualitymanagementkind, d.pidsystems, @@ -145,4 +160,8 @@ GROUP BY d.jurisdiction, d.thematic, -- REMOVED ???: de.knowledge_graph, - d.content_policies + d.content_policies, + d.submission_policy_url, + d.preservation_policy_url, + d.research_product_access_policies, + d.research_product_metadata_access_policies \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 05ae5be74..488a1f70b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -63,10 +63,10 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessDatasource() throws Exception { + public void testProcessService() throws Exception { final List fields = prepareMocks("datasources_resultset_entry.json"); - final List list = app.processDatasource(rs); + final List list = app.processService(rs); assertEquals(1, list.size()); verifyMocks(fields); From f5f532d134159c1e14f72811cef1aba36f80d980 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 29 Apr 2022 12:25:24 +0200 Subject: [PATCH 07/12] EOSC Services - ongoing update --- .../dhp/schema/oaf/utils/OafMapperUtils.java | 13 + .../schema/oaf/utils/OafMapperUtilsTest.java | 185 +++++++------- .../raw/MigrateDbEntitiesApplication.java | 75 ++++-- .../dhp/oa/graph/sql/queryServices.sql | 52 ++-- .../raw/MigrateDbEntitiesApplicationTest.java | 137 +++++++++-- ...try.json => services_resultset_entry.json} | 229 ++++++++++++------ .../oa/provision/utils/XmlRecordFactory.java | 6 - pom.xml | 2 +- 8 files changed, 459 insertions(+), 240 deletions(-) rename dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/{datasources_resultset_entry.json => services_resultset_entry.json} (68%) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 0a51e8600..aac2a7801 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -3,6 +3,8 @@ package eu.dnetlib.dhp.schema.oaf.utils; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import java.sql.Array; +import java.sql.SQLException; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; @@ -118,6 +120,17 @@ public class OafMapperUtils { .collect(Collectors.toList()); } + public static List listValues(Array values) throws SQLException { + if (Objects.isNull(values)) { + return null; + } + return Arrays + .stream((T[]) values.getArray()) + .filter(Objects::nonNull) + .distinct() + .collect(Collectors.toList()); + } + public static List> listFields(final DataInfo info, final List values) { return values .stream() diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index 79629a171..9111ac2df 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -44,105 +44,104 @@ class OafMapperUtilsTest { @Test void testDateValidation() { - assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent()); - assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent()); - assertTrue(GraphCleaningFunctions.doCleanDate(" 2016-04-05").isPresent()); + assertNotNull(GraphCleaningFunctions.cleanDate("2016-05-07T12:41:19.202Z ")); + assertNotNull(GraphCleaningFunctions.cleanDate("2020-09-10 11:08:52 ")); + assertNotNull(GraphCleaningFunctions.cleanDate(" 2016-04-05")); - assertEquals("2016-04-05", GraphCleaningFunctions.doCleanDate("2016 Apr 05").get()); + assertEquals("2016-04-05", GraphCleaningFunctions.cleanDate("2016 Apr 05")); - assertEquals("2009-05-08", GraphCleaningFunctions.doCleanDate("May 8, 2009 5:57:51 PM").get()); - assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, 1970").get()); - assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, '70").get()); - assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 1970").get()); - assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 70").get()); - assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 2006").get()); - assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 MST 2006").get()); - assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 02 15:04:05 -0700 2006").get()); - assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Monday, 02-Jan-06 15:04:05 MST").get()); - assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 MST").get()); - assertEquals("2017-07-11", GraphCleaningFunctions.doCleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)").get()); - assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 -0700").get()); - assertEquals("2018-01-04", GraphCleaningFunctions.doCleanDate("Thu, 4 Jan 2018 17:53:36 +0000").get()); - assertEquals("2015-08-10", GraphCleaningFunctions.doCleanDate("Mon Aug 10 15:44:11 UTC+0100 2015").get()); + assertEquals("2009-05-08", GraphCleaningFunctions.cleanDate("May 8, 2009 5:57:51 PM")); + assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, 1970")); + assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, '70")); + assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 1970")); + assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 70")); + assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 2 15:04:05 2006")); + assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 2 15:04:05 MST 2006")); + assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 02 15:04:05 -0700 2006")); + assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Monday, 02-Jan-06 15:04:05 MST")); + assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 MST")); + assertEquals("2017-07-11", GraphCleaningFunctions.cleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)")); + assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 -0700")); + assertEquals("2018-01-04", GraphCleaningFunctions.cleanDate("Thu, 4 Jan 2018 17:53:36 +0000")); + assertEquals("2015-08-10", GraphCleaningFunctions.cleanDate("Mon Aug 10 15:44:11 UTC+0100 2015")); assertEquals( "2015-07-03", - GraphCleaningFunctions.doCleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)").get()); - assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 10:09am").get()); - assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 at 10:09am PST-08").get()); - assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012, 10:10:09").get()); - assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7, 1970").get()); - assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7th, 1970").get()); - assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006, 19:17").get()); - assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006 19:17").get()); - assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 70").get()); - assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 1970").get()); - assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("03 February 2013").get()); - assertEquals("2013-07-01", GraphCleaningFunctions.doCleanDate("1 July 2013").get()); - assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("2013-Feb-03").get()); - assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3/31/2014").get()); - assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03/31/2014").get()); - assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08/21/71").get()); - assertEquals("1971-01-08", GraphCleaningFunctions.doCleanDate("8/1/71").get()); - assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/2014 22:05").get()); - assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("04/08/2014 22:05").get()); - assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/14 22:05").get()); - assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("04/2/2014 03:00:51").get()); - assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00:00 AM").get()); - assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00:01 PM").get()); - assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00 PM").get()); - assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 1:00 PM").get()); - assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00 AM").get()); - assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("4/02/2014 03:00:51").get()); - assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59").get()); - assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59.3186369").get()); - assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/3/31").get()); - assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/03/31").get()); - assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/4/8 22:05").get()); - assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/04/08 22:05").get()); - assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/04/2 03:00:51").get()); - assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/4/02 03:00:51").get()); - assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59").get()); - assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59.3186369").get()); - assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014年04月08日").get()); - assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get()); - assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get()); - assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get()); - assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get()); - assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get()); - assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get()); - assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get()); - assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43").get()); - assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43:22").get()); - assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 UTC").get()); - assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 GMT").get()); - assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 05:24:37 PM").get()); - assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800").get()); - assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800 +08").get()); - assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:44 +09:00").get()); - assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000 +0000 UTC").get()); - assertEquals("2015-09-30", GraphCleaningFunctions.doCleanDate("2015-09-30 18:48:56.35272715 +0000 UTC").get()); - assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 GMT").get()); - assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 UTC").get()); + GraphCleaningFunctions.cleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)")); + assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 10:09am")); + assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 at 10:09am PST-08")); + assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012, 10:10:09")); + assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7, 1970")); + assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7th, 1970")); + assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006, 19:17")); + assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006 19:17")); + assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 70")); + assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 1970")); + assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("03 February 2013")); + assertEquals("2013-07-01", GraphCleaningFunctions.cleanDate("1 July 2013")); + assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("2013-Feb-03")); + assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3/31/2014")); + assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03/31/2014")); + assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08/21/71")); + assertEquals("1971-01-08", GraphCleaningFunctions.cleanDate("8/1/71")); + assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/2014 22:05")); + assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("04/08/2014 22:05")); + assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/14 22:05")); + assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("04/2/2014 03:00:51")); + assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00:00 AM")); + assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00:01 PM")); + assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00 PM")); + assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 1:00 PM")); + assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00 AM")); + assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("4/02/2014 03:00:51")); + assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59")); + assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59.3186369")); + assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/3/31")); + assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/03/31")); + assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/4/8 22:05")); + assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/04/08 22:05")); + assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/04/2 03:00:51")); + assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/4/02 03:00:51")); + assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59")); + assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59.3186369")); + assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014年04月08日")); + assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("2006-01-02T15:04:05+0000")); + assertEquals("2009-08-13", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09-07:00")); + assertEquals("2009-08-12", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09")); + assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.3186369")); + assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000")); + assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.123")); + assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43")); + assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43:22")); + assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 UTC")); + assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 GMT")); + assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 05:24:37 PM")); + assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800")); + assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800 +08")); + assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:44 +09:00")); + assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000 +0000 UTC")); + assertEquals("2015-09-30", GraphCleaningFunctions.cleanDate("2015-09-30 18:48:56.35272715 +0000 UTC")); + assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 GMT")); + assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 UTC")); assertEquals( - "2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001").get()); + "2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001")); assertEquals( - "2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001").get()); - assertEquals("2017-07-19", GraphCleaningFunctions.doCleanDate("2017-07-19 03:21:51+00:00").get()); - assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26").get()); - assertEquals("2014-04-01", GraphCleaningFunctions.doCleanDate("2014-04").get()); - assertEquals("2014-01-01", GraphCleaningFunctions.doCleanDate("2014").get()); - assertEquals("2014-05-11", GraphCleaningFunctions.doCleanDate("2014-05-11 08:20:13,787").get()); - assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3.31.2014").get()); - assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03.31.2014").get()); - assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08.21.71").get()); - assertEquals("2014-03-01", GraphCleaningFunctions.doCleanDate("2014.03").get()); - assertEquals("2014-03-30", GraphCleaningFunctions.doCleanDate("2014.03.30").get()); - assertEquals("2014-06-01", GraphCleaningFunctions.doCleanDate("20140601").get()); - assertEquals("2014-07-22", GraphCleaningFunctions.doCleanDate("20140722105203").get()); - assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("1332151919").get()); - assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367189").get()); - assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222").get()); - assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222333").get()); + "2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001")); + assertEquals("2017-07-19", GraphCleaningFunctions.cleanDate("2017-07-19 03:21:51+00:00")); + assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26")); + assertEquals("2014-04-01", GraphCleaningFunctions.cleanDate("2014-04")); + assertEquals("2014-01-01", GraphCleaningFunctions.cleanDate("2014")); + assertEquals("2014-05-11", GraphCleaningFunctions.cleanDate("2014-05-11 08:20:13,787")); + assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3.31.2014")); + assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03.31.2014")); + assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08.21.71")); + assertEquals("2014-03-01", GraphCleaningFunctions.cleanDate("2014.03")); + assertEquals("2014-03-30", GraphCleaningFunctions.cleanDate("2014.03.30")); + assertEquals("2014-06-01", GraphCleaningFunctions.cleanDate("20140601")); + assertEquals("2014-07-22", GraphCleaningFunctions.cleanDate("20140722105203")); + assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("1332151919")); + assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367189")); + assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222")); + assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222333")); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index b1be7f10a..924d53593 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -27,15 +27,7 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.asString; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; import java.io.Closeable; import java.io.IOException; @@ -245,22 +237,20 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds .setOriginalId( Arrays - .asList((String[]) rs.getArray("identities").getArray()) + .asList((String[]) rs.getArray("originalid").getArray()) .stream() .filter(StringUtils::isNotBlank) .collect(Collectors.toList())); - ds - .setCollectedfrom( - listKeyValues( - createOpenaireId(10, rs.getString("collectedfromid"), true), - rs.getString("collectedfromname"))); - ds.setPid(new ArrayList<>()); + ds.setCollectedfrom(prepareCollectedfrom(rs.getArray("collectedfrom"))); + ds.setPid(prepareListOfStructProps(rs.getArray("pid"), info)); ds.setDateofcollection(asString(rs.getDate("dateofcollection"))); ds.setDateoftransformation(null); // Value not returned by the SQL query ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB ds.setOaiprovenance(null); // Values not present in the DB ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype"))); ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui"))); + ds.setEosctype(prepareQualifierSplitting(rs.getString("eosctype"))); + ds.setEoscdatasourcetype(prepareQualifierSplitting(rs.getString("eoscdatasourcetype"))); ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility"))); ds.setOfficialname(field(rs.getString("officialname"), info)); ds.setEnglishname(field(rs.getString("englishname"), info)); @@ -277,6 +267,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info)); ds.setOdpolicies(field(rs.getString("odpolicies"), info)); ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info)); + ds.setLanguages(listValues(rs.getArray("languages"))); ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info)); ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info)); ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info)); @@ -289,8 +280,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info)); ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info)); ds.setVersioning(field(rs.getBoolean("versioning"), info)); + ds.setVersioncontrol(rs.getBoolean("versioncontrol")); ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info)); - ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info)); + ds.setPidsystems(field(rs.getString("pidsystems"), info)); ds.setCertificates(field(rs.getString("certificates"), info)); ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array @@ -299,13 +291,20 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i journal( rs.getString("officialname"), rs.getString("issnPrinted"), rs.getString("issnOnline"), rs.getString("issnLinking"), info)); // Journal - ds.setDataInfo(info); - ds.setLastupdatetimestamp(lastUpdateTimestamp); + ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes"))); + ds.setProvidedproducttypes(listValues(rs.getArray("providedproducttypes"))); ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction"))); ds.setThematic(rs.getBoolean("thematic")); ds.setKnowledgegraph(rs.getBoolean("knowledgegraph")); ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies"))); + ds.setSubmissionpolicyurl(rs.getString("submissionpolicyurl")); + ds.setPreservationpolicyurl(rs.getString("preservationpolicyurl")); + ds.setResearchproductaccesspolicies(listValues(rs.getArray("researchproductaccesspolicies"))); + ds + .setResearchproductmetadataaccesspolicies( + listValues(rs.getArray("researchproductmetadataaccesspolicies"))); + ds.setConsenttermsofuse(rs.getBoolean("consenttermsofuse")); ds.setFulltextdownload(rs.getBoolean("fulltextdownload")); ds @@ -313,8 +312,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i Optional .ofNullable( rs.getDate("consenttermsofusedate")) - .map(c -> c.toString()) + .map(java.sql.Date::toString) .orElse(null)); + ds + .setLastconsenttermsofusedate( + Optional + .ofNullable( + rs.getDate("lastconsenttermsofusedate")) + .map(java.sql.Date::toString) + .orElse(null)); + + ds.setDataInfo(info); + ds.setLastupdatetimestamp(lastUpdateTimestamp); return Arrays.asList(ds); } catch (final Exception e) { @@ -603,6 +612,32 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i String.format("%.3f", trust)); } + private List prepareCollectedfrom(Array values) throws SQLException { + if (Objects.isNull(values)) { + return null; + } + return Arrays + .stream((String[]) values.getArray()) + .filter(Objects::nonNull) + .distinct() + .map(s -> keyValueSplitting(s, "@@@")) + .collect(Collectors.toList()); + } + + public static KeyValue keyValueSplitting(final String s, String separator) { + if (StringUtils.isBlank(s)) { + return null; + } + final String[] arr = s.split(separator); + if (arr.length != 2) { + return null; + } + KeyValue kv = new KeyValue(); + kv.setKey(createOpenaireId(10, arr[0], true)); + kv.setValue(arr[1]); + return kv; + } + private Qualifier prepareQualifierSplitting(final String s) { if (StringUtils.isBlank(s)) { return null; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql index 603b1e9b2..3dbb46eaa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql @@ -1,6 +1,7 @@ SELECT d.id AS datasourceid, - array_remove(d.id || array_agg(distinct di.pid) filter (where di.pid like 'piwik%') || array_agg(distinct dds.duplicate), NULL) AS identities, + array_remove(d.id || array_agg(distinct CASE WHEN dp.pid like 'piwik%' THEN di.pid ELSE NULL END) || array_agg(distinct dds.duplicate), NULL) AS originalid, + array_remove(array_agg(distinct CASE WHEN di.pid NOT LIKE 'piwik%' THEN di.pid||'###'||di.issuertype||'@@@'||'dnet:pid_types' ELSE NULL END), NULL) as pid, d.officialname AS officialname, d.englishname AS englishname, d.contactemail AS contactemail, @@ -9,8 +10,8 @@ SELECT THEN 'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0']) - THEN - 'openaire4.0@@@dnet:datasourceCompatibilityLevel' + THEN + 'openaire4.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0']) THEN 'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel' @@ -40,29 +41,28 @@ SELECT END AS openairecompatibility, d.websiteurl AS websiteurl, d.logourl AS logourl, - array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END) AS accessinfopackage, + array_remove(array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END), NULL) AS accessinfopackage, d.latitude AS latitude, d.longitude AS longitude, d.namespaceprefix AS namespaceprefix, NULL AS odnumberofitems, NULL AS odnumberofitemsdate, - (SELECT array_agg(s|| '###keywords@@@dnet:subject_classification_typologies') FROM UNNEST( ARRAY( SELECT trim(s) - FROM unnest(string_to_array(d.subjects, '@@')) AS s)) AS s) AS subjects, + FROM unnest(string_to_array(d.subjects, '@@')) AS s)) AS s) AS subjects, d.description AS description, NULL AS odpolicies, - ARRAY(SELECT trim(s) - FROM unnest(string_to_array(d.languages, ',')) AS s) AS odlanguages, - ARRAY(SELECT trim(s) - FROM unnest(string_to_array(d.languages, ',')) AS s) AS languages, + array_remove(ARRAY(SELECT trim(s) + FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g') ',')) AS s), '{}') AS odlanguages, + array_remove(ARRAY(SELECT trim(s) + FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g') ',')) AS s), '{}') AS languages, -- Term provided only by OpenDOAR: -- probably updating the TR it could be replaced by research_entity_types[] -- But a study on the vocabulary terms is needed - -- REMOVED: ARRAY(SELECT trim(s) FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes, + -- REMOVED: ARRAY(SELECT trim(s) FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes, false AS inferred, false AS deletedbyinference, @@ -81,20 +81,24 @@ SELECT d.datauploadtype AS datauploadtype, d.databaseaccessrestriction AS databaseaccessrestriction, d.datauploadrestriction AS datauploadrestriction, - -- REPLACED BY version_control : d.versioning AS versioning, + -- REPLACED BY version_control : d.versioning AS versioning, d.version_control AS versioning, d.version_control AS versioncontrol, d.citationguidelineurl AS citationguidelineurl, - - d.pidsystems AS pidsystems, + array_to_string(array_agg(distinct dps.scheme), ' ') AS pidsystems, d.certificates AS certificates, ARRAY[]::text[] AS policies, - array_remove(dc.id||'@@@'||dc.officialname || array_agg(distinct dds_cf.id||'@@@'||dds_cf.officialname), NULL) AS collectedfrom, - + array_remove( + array( + select distinct cf + from unnest( + dc.id||'@@@'||dc.officialname || array_agg(distinct dds_cf.id||'@@@'||dds_cf.officialname) + ) as cf), + NULL) AS collectedfrom, d._typology_to_remove_||'@@@dnet:datasource_typologies' AS datasourcetype, + d._typology_to_remove_||'@@@dnet:datasource_typologies_ui' AS datasourcetypeui, d.eosc_type||'@@@dnet:eosc_types' AS eosctype, d.eosc_datasource_type||'@@@dnet:eosc_datasource_types' AS eoscdatasourcetype, - 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, d.issn AS issnPrinted, d.eissn AS issnOnline, d.lissn AS issnLinking, @@ -107,19 +111,20 @@ SELECT d.thematic AS thematic, -- REMOVED ???: d.knowledge_graph AS knowledgegraph, array(select unnest(d.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies, - d.submission_policy_url AS submissionpolicyurl, - d.preservation_policy_url AS preservationpolicyurl, - d.research_product_access_policies AS researchproductaccesspolicies, - d.research_product_metadata_access_policies AS researchproductmetadataaccesspolicies + nullif(trim(d.submission_policy_url), '') AS submissionpolicyurl, + nullif(trim(d.preservation_policy_url), '') AS preservationpolicyurl, + array_remove(d.research_product_access_policies, '') AS researchproductaccesspolicies, + array_remove(d.research_product_metadata_access_policies, '') AS researchproductmetadataaccesspolicies FROM dsm_services d LEFT OUTER JOIN dsm_services dc on (d.collectedfrom = dc.id) LEFT OUTER JOIN dsm_api a ON (d.id = a.service) -LEFT OUTER JOIN dsm_servicepids di ON (d.id = di.service) +LEFT OUTER JOIN dsm_servicepids dp ON (d.id = dp.service) +LEFT OUTER JOIN dsm_identities di ON (dp.pid = di.pid) LEFT OUTER JOIN dsm_dedup_services dds ON (d.id = dds.id) LEFT OUTER JOIN dsm_services dds_dup ON (dds.duplicate = dds_dup.id) LEFT OUTER JOIN dsm_services dds_cf ON (dds_dup.collectedfrom = dds_cf.id) - +LEFT OUTER JOIN dsm_pid_systems dps ON (d.id = dps.service) WHERE d.dedup_main_service = true @@ -150,7 +155,6 @@ GROUP BY d.version_control, d.citationguidelineurl, -- REMOVED: d.qualitymanagementkind, - d.pidsystems, d.certificates, dc.id, dc.officialname, diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 488a1f70b..390db0e4e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -12,8 +12,11 @@ import java.sql.Array; import java.sql.Date; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.HashSet; import java.util.List; import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -28,12 +31,7 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Organization; -import eu.dnetlib.dhp.schema.oaf.Project; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @ExtendWith(MockitoExtension.class) @@ -64,7 +62,7 @@ public class MigrateDbEntitiesApplicationTest { @Test public void testProcessService() throws Exception { - final List fields = prepareMocks("datasources_resultset_entry.json"); + final List fields = prepareMocks("services_resultset_entry.json"); final List list = app.processService(rs); assertEquals(1, list.size()); @@ -72,13 +70,17 @@ public class MigrateDbEntitiesApplicationTest { final Datasource ds = (Datasource) list.get(0); assertValidId(ds.getId()); - assertValidId(ds.getCollectedfrom().get(0).getKey()); + ds + .getCollectedfrom() + .stream() + .map(KeyValue::getKey) + .forEach(dsId -> assertValidId(dsId)); assertEquals(getValueAsString("officialname", fields), ds.getOfficialname().getValue()); assertEquals(getValueAsString("englishname", fields), ds.getEnglishname().getValue()); - assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue()); assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl().getValue()); + assertEquals(getValueAsString("logourl", fields), ds.getLogourl()); + assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue()); assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix().getValue()); - assertEquals(getValueAsString("collectedfromname", fields), ds.getCollectedfrom().get(0).getValue()); assertEquals(getValueAsString("officialname", fields), ds.getJournal().getName()); assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted()); assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline()); @@ -90,19 +92,103 @@ public class MigrateDbEntitiesApplicationTest { assertEquals("pubsrepository::journal", ds.getDatasourcetypeui().getClassid()); assertEquals("dnet:datasource_typologies_ui", ds.getDatasourcetypeui().getSchemeid()); + assertEquals("Data Source", ds.getEosctype().getClassid()); + assertEquals("Data Source", ds.getEosctype().getClassname()); + assertEquals("dnet:eosc_types", ds.getEosctype().getSchemeid()); + assertEquals("dnet:eosc_types", ds.getEosctype().getSchemename()); + + assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassid()); + assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassname()); + assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemeid()); + assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemename()); + + assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassid()); + assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassname()); + assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemeid()); + assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemename()); + + assertEquals(getValueAsDouble("latitude", fields).toString(), ds.getLatitude().getValue()); + assertEquals(getValueAsDouble("longitude", fields).toString(), ds.getLongitude().getValue()); + assertEquals(getValueAsString("dateofvalidation", fields), ds.getDateofvalidation()); + + assertEquals(getValueAsString("description", fields), ds.getDescription().getValue()); + + // TODO assertEquals(getValueAsString("subjects", fields), ds.getSubjects()); + + assertEquals("0.0", ds.getOdnumberofitems().getValue()); + assertEquals(getValueAsString("odnumberofitemsdate", fields), ds.getOdnumberofitemsdate()); + assertEquals(getValueAsString("odpolicies", fields), ds.getOdpolicies()); + + assertEquals( + getValueAsList("odlanguages", fields), + ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList())); + assertEquals(getValueAsList("languages", fields), ds.getLanguages()); + assertEquals( + getValueAsList("odcontenttypes", fields), + ds.getOdcontenttypes().stream().map(Field::getValue).collect(Collectors.toList())); + assertEquals( + getValueAsList("accessinfopackage", fields), + ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList())); + assertEquals(getValueAsString("releasestartdate", fields), ds.getReleasestartdate()); + assertEquals(getValueAsString("releaseenddate", fields), ds.getReleasestartdate()); + assertEquals(getValueAsString("missionstatementurl", fields), ds.getMissionstatementurl()); + + assertEquals(false, ds.getDataprovider().getValue()); + assertEquals(false, ds.getServiceprovider().getValue()); + + assertEquals(getValueAsString("databaseaccesstype", fields), ds.getDatabaseaccesstype()); + assertEquals(getValueAsString("datauploadtype", fields), ds.getDatauploadtype()); + assertEquals(getValueAsString("databaseaccessrestriction", fields), ds.getDatabaseaccessrestriction()); + assertEquals(getValueAsString("datauploadrestriction", fields), ds.getDatauploadrestriction()); + + assertEquals(false, ds.getVersioning().getValue()); + assertEquals(false, ds.getVersioncontrol()); + + assertEquals(getValueAsString("citationguidelineurl", fields), ds.getCitationguidelineurl()); + assertEquals(getValueAsString("pidsystems", fields), ds.getPidsystems()); + assertEquals(getValueAsString("certificates", fields), ds.getCertificates()); + + assertEquals(getValueAsList("researchentitytypes", fields), ds.getResearchentitytypes()); + assertEquals(getValueAsList("providedproducttypes", fields), ds.getProvidedproducttypes()); + assertEquals("National", ds.getJurisdiction().getClassid()); assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid()); assertTrue(ds.getThematic()); assertTrue(ds.getKnowledgegraph()); - assertEquals(1, ds.getContentpolicies().size()); - assertEquals("Journal article", ds.getContentpolicies().get(0).getClassid()); - assertEquals("eosc:contentpolicies", ds.getContentpolicies().get(0).getSchemeid()); + HashSet cpSchemeId = ds + .getContentpolicies() + .stream() + .map(Qualifier::getSchemeid) + .collect(Collectors.toCollection(HashSet::new)); + assertTrue(cpSchemeId.size() == 1); + assertTrue(cpSchemeId.contains("eosc:contentpolicies")); + HashSet cpSchemeName = ds + .getContentpolicies() + .stream() + .map(Qualifier::getSchemename) + .collect(Collectors.toCollection(HashSet::new)); + assertTrue(cpSchemeName.size() == 1); + assertTrue(cpSchemeName.contains("eosc:contentpolicies")); + assertEquals(2, ds.getContentpolicies().size()); + assertEquals("Taxonomic classification", ds.getContentpolicies().get(0).getClassid()); + assertEquals("Resource collection", ds.getContentpolicies().get(1).getClassid()); + + assertEquals(getValueAsString("submissionpolicyurl", fields), ds.getSubmissionpolicyurl()); + assertEquals(getValueAsString("preservationpolicyurl", fields), ds.getPreservationpolicyurl()); + + assertEquals( + getValueAsList("researchproductaccesspolicies", fields), + ds.getResearchproductaccesspolicies()); + assertEquals( + getValueAsList("researchproductmetadataaccesspolicies", fields), + ds.getResearchproductmetadataaccesspolicies()); assertEquals(true, ds.getConsenttermsofuse()); assertEquals(true, ds.getFulltextdownload()); assertEquals("2022-03-11", ds.getConsenttermsofusedate()); + assertEquals("2022-03-11", ds.getLastconsenttermsofusedate()); } @Test @@ -356,18 +442,31 @@ public class MigrateDbEntitiesApplicationTest { } private Float getValueAsFloat(final String name, final List fields) { - return new Float(getValueAs(name, fields).toString()); + final Object value = getValueAs(name, fields); + return value != null ? new Float(value.toString()) : null; + } + + private Double getValueAsDouble(final String name, final List fields) { + final Object value = getValueAs(name, fields); + return value != null ? new Double(value.toString()) : null; + } + + private Integer getValueAsInt(final String name, final List fields) { + final Object value = getValueAs(name, fields); + return value != null ? new Integer(value.toString()) : null; } private T getValueAs(final String name, final List fields) { - return fields + final Optional field = fields .stream() .filter(f -> f.getField().equals(name)) - .map(TypedField::getValue) - .filter(Objects::nonNull) - .map(o -> (T) o) .findFirst() - .get(); + .map(TypedField::getValue) + .map(o -> (T) o); + if (!field.isPresent()) { + return null; + } + return field.get(); } private List getValueAsList(final String name, final List fields) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/services_resultset_entry.json similarity index 68% rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json rename to dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/services_resultset_entry.json index 11b884cde..70fad3323 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/services_resultset_entry.json @@ -5,14 +5,47 @@ "value": "274269ac6f3b::2579-5449" }, { - "field": "identities", + "field": "originalid", "type": "array", "value": [ - "274269ac6f3b::2579-5449", + "fairsharing_::1562", "piwik:13", - null + null, + "re3data_____::r3d100010213" ] }, + { + "field": "pid", + "type": "array", + "value": [ + "r3d100010218###re3data@@@dnet:pid_types" + ] + }, + { + "field": "datasourcetype", + "type": "string", + "value": "pubsrepository::journal@@@dnet:datasource_typologies" + }, + { + "field": "datasourcetypeui", + "type": "string", + "value": "pubsrepository::journal@@@dnet:datasource_typologies_ui" + }, + { + "field": "eosctype", + "type": "string", + "value": "Data Source@@@dnet:eosc_types" + }, + { + "field": "eoscdatasourcetype", + "type": "string", + "value": "Journal archive@@@dnet:eosc_datasource_types" + }, + { + "field": "openairecompatibility", + "type": "string", + "value": "openaire4.0@@@dnet:datasourceCompatibilityLevel" + }, { "field": "officialname", "type": "string", @@ -23,16 +56,6 @@ "type": "string", "value": "Jurnal Ilmiah Pendidikan Scholastic" }, - { - "field": "contactemail", - "type": "string", - "value": "test@test.it" - }, - { - "field": "openairecompatibility", - "type": "string", - "value": "hostedBy@@@dnet:datasourceCompatibilityLevel" - }, { "field": "websiteurl", "type": "string", @@ -44,11 +67,14 @@ "value": null }, { - "field": "accessinfopackage", - "type": "array", - "value": [ - null - ] + "field": "contactemail", + "type": "string", + "value": "test@test.it" + }, + { + "field": "namespaceprefix", + "type": "string", + "value": "ojs_25795449" }, { "field": "latitude", @@ -61,9 +87,19 @@ "value": 0 }, { - "field": "namespaceprefix", + "field": "dateofvalidation", + "type": "date", + "value": null + }, + { + "field": "description", "type": "string", - "value": "ojs_25795449" + "value": "veterinary medicine" + }, + { + "field": "subjects", + "type": "array", + "value": [] }, { "field": "odnumberofitems", @@ -75,16 +111,6 @@ "type": "date", "value": null }, - { - "field": "subjects", - "type": "array", - "value": null - }, - { - "field": "description", - "type": "string", - "value": null - }, { "field": "odpolicies", "type": "string", @@ -93,7 +119,26 @@ { "field": "odlanguages", "type": "array", - "value": [] + "value": [ + "English", + "German", + "French", + "Danish", + "Norwegian", + "Swedish" + ] + }, + { + "field": "languages", + "type": "array", + "value": [ + "English", + "German", + "French", + "Danish", + "Norwegian", + "Swedish" + ] }, { "field": "odcontenttypes", @@ -103,34 +148,11 @@ ] }, { - "field": "inferred", - "type": "boolean", - "value": false - }, - { - "field": "deletedbyinference", - "type": "boolean", - "value": false - }, - { - "field": "trust", - "type": "double", - "value": 0.9 - }, - { - "field": "inferenceprovenance", - "type": "string", - "value": null - }, - { - "field": "dateofcollection", - "type": "date", - "value": "2020-01-21" - }, - { - "field": "dateofvalidation", - "type": "date", - "value": null + "field": "accessinfopackage", + "type": "array", + "value": [ + "http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai" + ] }, { "field": "releasestartdate", @@ -183,12 +205,12 @@ "value": null }, { - "field": "citationguidelineurl", - "type": "string", + "field": "versioncontrol", + "type": "boolean", "value": null }, { - "field": "qualitymanagementkind", + "field": "citationguidelineurl", "type": "string", "value": null }, @@ -208,29 +230,38 @@ "value": [] }, { - "field": "collectedfromid", - "type": "string", - "value": "openaire____::SnVybmFsIEZha3VsdGFzIFNhc3RyYSBVbml2ZXJzaXRhcyBFa2FzYWt0aQ==" + "field": "inferred", + "type": "boolean", + "value": false }, { - "field": "collectedfromname", - "type": "string", - "value": "Jurnal Fakultas Sastra Universitas Ekasakti" + "field": "deletedbyinference", + "type": "boolean", + "value": false }, { - "field": "datasourcetype", - "type": "string", - "value": "pubsrepository::journal@@@dnet:datasource_typologies" + "field": "trust", + "type": "double", + "value": 0.9 }, { - "field": "datasourcetypeui", + "field": "inferenceprovenance", "type": "string", - "value": "pubsrepository::journal@@@dnet:datasource_typologies_ui" + "value": null }, { - "field": "provenanceaction", - "type": "not_used", - "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions" + "field": "dateofcollection", + "type": "date", + "value": "2020-01-21" + }, + { + "field": "collectedfrom", + "type": "array", + "value": [ + "openaire____::fairsharing@@@FAIRsharing.org", + "openaire____::opendoar@@@OpenDOAR", + "openaire____::re3data@@@Registry of Research Data Repository" + ] }, { "field": "issnPrinted", @@ -247,6 +278,20 @@ "type": "string", "value": "2579-5447" }, + { + "field": "researchentitytypes", + "type": "array", + "value": [ + "Research Data" + ] + }, + { + "field": "providedproducttypes", + "type": "array", + "value": [ + + ] + }, { "field": "jurisdiction", "type": "string", @@ -266,7 +311,32 @@ "field": "contentpolicies", "type": "array", "value": [ - "Journal article@@@eosc:contentpolicies" + "Taxonomic classification@@@eosc:contentpolicies", + "Resource collection@@@eosc:contentpolicies" + ] + }, + { + "field": "submissionpolicyurl", + "type": "string", + "value": null + }, + { + "field": "preservationpolicyurl", + "type": "string", + "value": "Permanent Archiving https://datadryad.org/stash/faq" + }, + { + "field": "researchproductaccesspolicies", + "type": "array", + "value": [ + "https://100percentit.com/legal/" + ] + }, + { + "field": "researchproductmetadataaccesspolicies", + "type": "array", + "value": [ + "https://wenmr.science.uu.nl/conditions" ] }, { @@ -283,5 +353,10 @@ "field": "consenttermsofusedate", "type": "date", "value": "2022-03-11" + }, + { + "field": "lastconsenttermsofusedate", + "type": "date", + "value": "2022-03-11" } ] diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 5d4a831c0..0daf1c085 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -695,12 +695,6 @@ public class XmlRecordFactory implements Serializable { XmlSerializationUtils .asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue())); } - if (ds.getQualitymanagementkind() != null) { - metadata - .add( - XmlSerializationUtils - .asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue())); - } if (ds.getPidsystems() != null) { metadata .add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); diff --git a/pom.xml b/pom.xml index 136b9b867..03a4496d7 100644 --- a/pom.xml +++ b/pom.xml @@ -801,7 +801,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.32] + [2.11.34-eosc-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From 05c1ea92e967185012087f3829aa11bb4a340d1e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 29 Apr 2022 15:56:55 +0200 Subject: [PATCH 08/12] EOSC Services - added Service-specific fields in the XML record serialization --- .../raw/MigrateDbEntitiesApplicationTest.java | 6 + .../oa/provision/utils/XmlRecordFactory.java | 78 +++- .../oa/provision/XmlRecordFactoryTest.java | 11 +- .../dnetlib/dhp/oa/provision/datasource.json | 408 +++++++++++++++++- 4 files changed, 495 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 390db0e4e..69552c4dc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -75,6 +75,12 @@ public class MigrateDbEntitiesApplicationTest { .stream() .map(KeyValue::getKey) .forEach(dsId -> assertValidId(dsId)); + + assertEquals(1, ds.getPid().size()); + assertEquals("r3d100010218", ds.getPid().get(0).getValue()); + assertEquals("re3data", ds.getPid().get(0).getQualifier().getClassid()); + assertEquals("dnet:pid_types", ds.getPid().get(0).getQualifier().getSchemeid()); + assertEquals(getValueAsString("officialname", fields), ds.getOfficialname().getValue()); assertEquals(getValueAsString("englishname", fields), ds.getEnglishname().getValue()); assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl().getValue()); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 0daf1c085..b811a7a0f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -535,6 +535,12 @@ public class XmlRecordFactory implements Serializable { if (ds.getDatasourcetypeui() != null) { metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui())); } + if (ds.getEosctype() != null) { + metadata.add(XmlSerializationUtils.mapQualifier("eosctype", ds.getEosctype())); + } + if (ds.getEoscdatasourcetype() != null) { + metadata.add(XmlSerializationUtils.mapQualifier("eoscdatasourcetype", ds.getEoscdatasourcetype())); + } if (ds.getOpenairecompatibility() != null) { metadata .add( @@ -583,6 +589,16 @@ public class XmlRecordFactory implements Serializable { metadata .add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); } + if (ds.getSubjects() != null) { + metadata + .addAll( + ds + .getSubjects() + .stream() + .filter(Objects::nonNull) + .map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp)) + .collect(Collectors.toList())); + } if (ds.getOdnumberofitems() != null) { metadata .add( @@ -609,6 +625,16 @@ public class XmlRecordFactory implements Serializable { .map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue())) .collect(Collectors.toList())); } + if (ds.getLanguages() != null) { + metadata + .addAll( + ds + .getLanguages() + .stream() + .filter(Objects::nonNull) + .map(c -> XmlSerializationUtils.asXmlElement("languages", c)) + .collect(Collectors.toList())); + } if (ds.getOdcontenttypes() != null) { metadata .addAll( @@ -689,6 +715,12 @@ public class XmlRecordFactory implements Serializable { XmlSerializationUtils .asXmlElement("versioning", ds.getVersioning().getValue().toString())); } + if (ds.getVersioncontrol() != null) { + metadata + .add( + XmlSerializationUtils + .asXmlElement("versioncontrol", ds.getVersioncontrol().toString())); + } if (ds.getCitationguidelineurl() != null) { metadata .add( @@ -716,17 +748,24 @@ public class XmlRecordFactory implements Serializable { if (ds.getJournal() != null) { metadata.add(XmlSerializationUtils.mapJournal(ds.getJournal())); } - if (ds.getSubjects() != null) { + if (ds.getResearchentitytypes() != null) { metadata .addAll( ds - .getSubjects() + .getResearchentitytypes() .stream() - .filter(Objects::nonNull) - .map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp)) + .map(c -> XmlSerializationUtils.asXmlElement("researchentitytypes", c)) + .collect(Collectors.toList())); + } + if (ds.getProvidedproducttypes() != null) { + metadata + .addAll( + ds + .getProvidedproducttypes() + .stream() + .map(c -> XmlSerializationUtils.asXmlElement("providedproducttypes", c)) .collect(Collectors.toList())); } - if (ds.getJurisdiction() != null) { metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction())); } @@ -750,7 +789,34 @@ public class XmlRecordFactory implements Serializable { .map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q)) .collect(Collectors.toList())); } - + if (ds.getSubmissionpolicyurl() != null) { + metadata + .add(XmlSerializationUtils.asXmlElement("submissionpolicyurl", ds.getSubmissionpolicyurl())); + } + if (ds.getPreservationpolicyurl() != null) { + metadata + .add( + XmlSerializationUtils.asXmlElement("preservationpolicyurl", ds.getPreservationpolicyurl())); + } + if (ds.getResearchproductaccesspolicies() != null) { + metadata + .addAll( + ds + .getResearchproductaccesspolicies() + .stream() + .map(c -> XmlSerializationUtils.asXmlElement("researchproductaccesspolicies", c)) + .collect(Collectors.toList())); + } + if (ds.getResearchproductmetadataaccesspolicies() != null) { + metadata + .addAll( + ds + .getResearchproductmetadataaccesspolicies() + .stream() + .map( + c -> XmlSerializationUtils.asXmlElement("researchproductmetadataaccesspolicies", c)) + .collect(Collectors.toList())); + } break; case organization: final Organization o = (Organization) entity; diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index c32d868e8..f4763618b 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -11,6 +11,7 @@ import java.util.List; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; +import org.dom4j.Element; import org.dom4j.io.SAXReader; import org.junit.jupiter.api.Test; @@ -142,7 +143,7 @@ public class XmlRecordFactoryTest { } @Test - public void testDatasource() throws IOException, DocumentException { + public void testService() throws IOException, DocumentException { final ContextMapper contextMapper = new ContextMapper(); final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, @@ -167,6 +168,14 @@ public class XmlRecordFactoryTest { assertEquals("true", doc.valueOf("//thematic")); assertEquals("Journal article", doc.valueOf("//contentpolicy/@classname")); assertEquals("Journal archive", doc.valueOf("//datasourcetypeui/@classname")); + assertEquals("Data Source", doc.valueOf("//eosctype/@classname")); + final List pids = doc.selectNodes("//pid"); + assertEquals(1, pids.size()); + assertEquals("re3data", ((Element) pids.get(0)).attribute("classid").getValue()); + assertEquals( + "Registry of research data repositories", ((Element) pids.get(0)).attribute("classname").getValue()); + assertEquals("dnet:pid_types", ((Element) pids.get(0)).attribute("schemeid").getValue()); + assertEquals("dnet:pid_types", ((Element) pids.get(0)).attribute("schemename").getValue()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json index ae069b8b5..8a23b4e6a 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json @@ -1 +1,407 @@ -{"collectedfrom":[{"key":"10|openaire____::13068d7823ea0bd86516ac2cb66e96ba","value":"Jurnal Fakultas Sastra Universitas Ekasakti","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1645012035118,"id":"10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7","originalId":["274269ac6f3b::2579-5449","piwik:13"],"pid":[],"dateofcollection":"2020-01-21","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"datasourcetype":{"classid":"pubsrepository::journal","classname":"pubsrepository::journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"pubsrepository::journal","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"openairecompatibility":{"classid":"hostedBy","classname":"hostedBy","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"officialname":{"value":"Jurnal Ilmiah Pendidikan Scholastic","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"englishname":{"value":"Jurnal Ilmiah Pendidikan Scholastic","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"websiteurl":{"value":"http://e-journal.sastra-unes.com/index.php/JIPS/index","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"logourl":null,"contactemail":{"value":"test@test.it","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"namespaceprefix":{"value":"ojs_25795449","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"latitude":{"value":"0.0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"longitude":{"value":"0.0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"dateofvalidation":null,"description":null,"subjects":[],"odnumberofitems":{"value":"0.0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"odnumberofitemsdate":null,"odpolicies":null,"odlanguages":[],"odcontenttypes":[{"value":"Journal articles","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"accessinfopackage":[],"releasestartdate":null,"releaseenddate":null,"missionstatementurl":null,"dataprovider":{"value":false,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"serviceprovider":{"value":false,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"databaseaccesstype":null,"datauploadtype":null,"databaseaccessrestriction":null,"datauploadrestriction":null,"versioning":{"value":false,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"citationguidelineurl":null,"qualitymanagementkind":null,"pidsystems":null,"certificates":null,"policies":[],"journal":{"name":"Jurnal Ilmiah Pendidikan Scholastic","issnPrinted":"2579-5449","issnOnline":"2579-5448","issnLinking":"2579-5447","ep":null,"iss":null,"sp":null,"vol":null,"edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"providedentitytypes":null,"providedproducttypes":null,"jurisdiction":{"classid":"National","classname":"National","schemeid":"eosc:jurisdictions","schemename":"eosc:jurisdictions"},"thematic":true,"knowledgegraph":true,"contentpolicies":[{"classid":"Journal article","classname":"Journal article","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}]} +{ + "collectedfrom": [ + { + "key": "10|openaire____::13068d7823ea0bd86516ac2cb66e96ba", + "value": "Jurnal Fakultas Sastra Universitas Ekasakti", + "dataInfo": null + } + ], + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + }, + "lastupdatetimestamp": 1645012035118, + "id": "10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7", + "originalId": [ + "274269ac6f3b::2579-5449", + "piwik:13" + ], + "dateofcollection": "2020-01-21", + "dateoftransformation": null, + "extraInfo": [], + "oaiprovenance": null, + "datasourcetype": { + "classid": "pubsrepository::journal", + "classname": "Journal", + "schemeid": "dnet:datasource_typologies", + "schemename": "dnet:datasource_typologies" + }, + "datasourcetypeui": { + "classid": "pubsrepository::journal", + "classname": "Journal archive", + "schemeid": "dnet:datasource_typologies_ui", + "schemename": "dnet:datasource_typologies_ui" + }, + "pid": [ + { + "value": "r3d100010218", + "qualifier": { + "classid": "re3data", + "classname": "Registry of research data repositories", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + } + } + ], + "eosctype": { + "classid": "Data Source", + "classname": "Data Source", + "schemeid": "dnet:eosc_types", + "schemename": "dnet:eosc_types" + }, + "eoscdatasourcetype": { + "classid": "Journal archive", + "classname": "Journal archive", + "schemeid": "dnet:eosc_datasource_types", + "schemename": "dnet:eosc_datasource_types" + }, + "openairecompatibility": { + "classid": "hostedBy", + "classname": "hostedBy", + "schemeid": "dnet:datasourceCompatibilityLevel", + "schemename": "dnet:datasourceCompatibilityLevel" + }, + "officialname": { + "value": "Jurnal Ilmiah Pendidikan Scholastic", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "englishname": { + "value": "Jurnal Ilmiah Pendidikan Scholastic", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "websiteurl": { + "value": "http://e-journal.sastra-unes.com/index.php/JIPS/index", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "logourl": null, + "contactemail": { + "value": "test@test.it", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "namespaceprefix": { + "value": "ojs_25795449", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "latitude": { + "value": "0.0", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "longitude": { + "value": "0.0", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "dateofvalidation": null, + "description": { + "value": "veterinary medicine", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "subjects": [], + "odnumberofitems": { + "value": "0.0", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "odnumberofitemsdate": null, + "odpolicies": null, + "odlanguages": [ + { + "value": "English", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "German", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "languages" : [ "English", "German" ], + "odcontenttypes": [ + { + "value": "Journal articles", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "accessinfopackage": [ + { + "value": "http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "releasestartdate": null, + "releaseenddate": null, + "missionstatementurl": null, + "dataprovider": { + "value": false, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "serviceprovider": { + "value": false, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "databaseaccesstype": null, + "datauploadtype": null, + "databaseaccessrestriction": null, + "datauploadrestriction": null, + "versioning": { + "value": false, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "versioncontrol": false, + "citationguidelineurl": null, + "qualitymanagementkind": null, + "pidsystems": null, + "certificates": null, + "policies": [], + "journal": { + "name": "Jurnal Ilmiah Pendidikan Scholastic", + "issnPrinted": "2579-5449", + "issnOnline": "2579-5448", + "issnLinking": "2579-5447", + "ep": null, + "iss": null, + "sp": null, + "vol": null, + "edition": null, + "conferenceplace": null, + "conferencedate": null, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.900", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "researchentitytypes":[ "Research Data" ], + "providedentitytypes": null, + "providedproducttypes": null, + "jurisdiction": { + "classid": "National", + "classname": "National", + "schemeid": "eosc:jurisdictions", + "schemename": "eosc:jurisdictions" + }, + "thematic": true, + "knowledgegraph": true, + "contentpolicies": [ + { + "classid": "Journal article", + "classname": "Journal article", + "schemeid": "eosc:contentpolicies", + "schemename": "eosc:contentpolicies" + } + ], + "submissionpolicyurl": null, + "preservationpolicyurl" : "Permanent Archiving https://datadryad.org/stash/faq", + "researchproductaccesspolicies": [ + "https://100percentit.com/legal/" + ], + "researchproductmetadataaccesspolicies": [ + "https://wenmr.science.uu.nl/conditions" + ] +} From a8c51f6f1627b357679a6f0a3efc396b2a82f2e0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 2 May 2022 11:09:03 +0200 Subject: [PATCH 09/12] EOSC Services - fixed query and testing preparation --- .../eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml | 1 + .../resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml index 29d4269ef..52462adb4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml @@ -102,6 +102,7 @@ --postgresUser${postgresUser} --postgresPassword${postgresPassword} --isLookupUrl${isLookupUrl} + --actionopenaire --dbschema${dbSchema} --nsPrefixBlacklist${nsPrefixBlacklist} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql index 3dbb46eaa..f83e077a3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql @@ -56,10 +56,10 @@ SELECT d.description AS description, NULL AS odpolicies, array_remove(ARRAY(SELECT trim(s) - FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g') ',')) AS s), '{}') AS odlanguages, + FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g'), ',')) AS s), '{}') AS odlanguages, array_remove(ARRAY(SELECT trim(s) - FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g') ',')) AS s), '{}') AS languages, - -- Term provided only by OpenDOAR: + FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g'), ',')) AS s), '{}') AS languages, + -- Term provided only by OpenDOAR: -- probably updating the TR it could be replaced by research_entity_types[] -- But a study on the vocabulary terms is needed -- REMOVED: ARRAY(SELECT trim(s) FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes, From b6a7ff3a99e0688f64e24e5f69c887245cece7bb Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 2 May 2022 15:52:33 +0200 Subject: [PATCH 10/12] EOSC Services - removed fields from mapping, testing preparation --- .../raw/MigrateDbEntitiesApplication.java | 13 ++-- .../oa/graph/raw_db/oozie_app/workflow.xml | 64 ++++++++++++++++++- .../dhp/oa/graph/sql/queryServices.sql | 1 - .../raw/MigrateDbEntitiesApplicationTest.java | 7 +- .../graph/raw/services_resultset_entry.json | 17 ----- .../dnetlib/dhp/oa/provision/datasource.json | 19 ------ 6 files changed, 68 insertions(+), 53 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 924d53593..c40f9b392 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -148,10 +148,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i log.info("Processing Organizations..."); smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix); - log.info("Processing relationsNoRemoval ds <-> orgs ..."); + log.info("Processing relations services <-> orgs ..."); smdbe .execute( - "queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization, + "queryServiceOrganization.sql", smdbe::processServiceOrganization, verifyNamespacePrefix); log.info("Processing projects <-> orgs ..."); @@ -268,13 +268,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setOdpolicies(field(rs.getString("odpolicies"), info)); ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info)); ds.setLanguages(listValues(rs.getArray("languages"))); - ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info)); ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info)); ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info)); ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info)); ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info)); - ds.setDataprovider(field(rs.getBoolean("dataprovider"), info)); - ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info)); ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info)); ds.setDatauploadtype(field(rs.getString("datauploadtype"), info)); ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info)); @@ -293,10 +290,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i rs.getString("issnLinking"), info)); // Journal ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes"))); - ds.setProvidedproducttypes(listValues(rs.getArray("providedproducttypes"))); ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction"))); ds.setThematic(rs.getBoolean("thematic")); - ds.setKnowledgegraph(rs.getBoolean("knowledgegraph")); ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies"))); ds.setSubmissionpolicyurl(rs.getString("submissionpolicyurl")); ds.setPreservationpolicyurl(rs.getString("preservationpolicyurl")); @@ -434,11 +429,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i } } - public List processDatasourceOrganization(final ResultSet rs) { + public List processServiceOrganization(final ResultSet rs) { try { final DataInfo info = prepareDataInfo(rs); final String orgId = createOpenaireId(20, rs.getString("organization"), true); - final String dsId = createOpenaireId(10, rs.getString("datasource"), true); + final String dsId = createOpenaireId(10, rs.getString("service"), true); final List collectedFrom = listKeyValues( createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml index 52462adb4..31b726f39 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml @@ -30,6 +30,11 @@ a blacklist of nsprefixes (comma separeted) + + reuseContent + false + reuse content in the aggregator database + sparkDriverMemory memory for driver process @@ -85,12 +90,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + ${wf:conf('reuseContent') eq false} + ${wf:conf('reuseContent') eq true} + + + + @@ -125,6 +138,55 @@ --actionclaims --nsPrefixBlacklist${nsPrefixBlacklist} + + + + + + + yarn + cluster + GenerateEntities + eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePaths${contentPath}/db_records,${contentPath}/db_claims + --targetPath${workingDir}/entities + --isLookupUrl${isLookupUrl} + --shouldHashIdtrue + + + + + + + + yarn + cluster + GenerateGraph + eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --sourcePath${workingDir}/entities + --graphRawPath${workingDir}/graph_aggregator + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql index f83e077a3..eb70d39e0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql @@ -109,7 +109,6 @@ SELECT d.lastconsenttermsofusedate AS lastconsenttermsofusedate, d.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction, d.thematic AS thematic, - -- REMOVED ???: d.knowledge_graph AS knowledgegraph, array(select unnest(d.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies, nullif(trim(d.submission_policy_url), '') AS submissionpolicyurl, nullif(trim(d.preservation_policy_url), '') AS preservationpolicyurl, diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 69552c4dc..948dbfa50 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -129,9 +129,6 @@ public class MigrateDbEntitiesApplicationTest { getValueAsList("odlanguages", fields), ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList())); assertEquals(getValueAsList("languages", fields), ds.getLanguages()); - assertEquals( - getValueAsList("odcontenttypes", fields), - ds.getOdcontenttypes().stream().map(Field::getValue).collect(Collectors.toList())); assertEquals( getValueAsList("accessinfopackage", fields), ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList())); @@ -155,13 +152,11 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(getValueAsString("certificates", fields), ds.getCertificates()); assertEquals(getValueAsList("researchentitytypes", fields), ds.getResearchentitytypes()); - assertEquals(getValueAsList("providedproducttypes", fields), ds.getProvidedproducttypes()); assertEquals("National", ds.getJurisdiction().getClassid()); assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid()); assertTrue(ds.getThematic()); - assertTrue(ds.getKnowledgegraph()); HashSet cpSchemeId = ds .getContentpolicies() @@ -246,7 +241,7 @@ public class MigrateDbEntitiesApplicationTest { public void testProcessDatasourceOrganization() throws Exception { final List fields = prepareMocks("datasourceorganization_resultset_entry.json"); - final List list = app.processDatasourceOrganization(rs); + final List list = app.processServiceOrganization(rs); assertEquals(2, list.size()); verifyMocks(fields); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/services_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/services_resultset_entry.json index 70fad3323..445334de5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/services_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/services_resultset_entry.json @@ -140,13 +140,6 @@ "Swedish" ] }, - { - "field": "odcontenttypes", - "type": "array", - "value": [ - "Journal articles" - ] - }, { "field": "accessinfopackage", "type": "array", @@ -169,16 +162,6 @@ "type": "string", "value": null }, - { - "field": "dataprovider", - "type": "boolean", - "value": null - }, - { - "field": "serviceprovider", - "type": "boolean", - "value": null - }, { "field": "databaseaccesstype", "type": "string", diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json index 8a23b4e6a..ce6b10826 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json @@ -254,24 +254,6 @@ } ], "languages" : [ "English", "German" ], - "odcontenttypes": [ - { - "value": "Journal articles", - "dataInfo": { - "invisible": false, - "inferred": false, - "deletedbyinference": false, - "trust": "0.900", - "inferenceprovenance": null, - "provenanceaction": { - "classid": "sysimport:crosswalk:entityregistry", - "classname": "sysimport:crosswalk:entityregistry", - "schemeid": "dnet:provenanceActions", - "schemename": "dnet:provenanceActions" - } - } - } - ], "accessinfopackage": [ { "value": "http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai", @@ -387,7 +369,6 @@ "schemename": "eosc:jurisdictions" }, "thematic": true, - "knowledgegraph": true, "contentpolicies": [ { "classid": "Journal article", From 2ade69dea694a6d57ce51a5d7da53bc43ac8a22e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 2 May 2022 17:03:31 +0200 Subject: [PATCH 11/12] EOSC Services - minor --- .../dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java | 2 +- .../resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index c40f9b392..552125307 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -233,7 +233,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final Datasource ds = new Datasource(); - ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true)); + ds.setId(createOpenaireId(10, rs.getString("id"), true)); ds .setOriginalId( Arrays diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql index eb70d39e0..81eab8163 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql @@ -1,5 +1,5 @@ SELECT - d.id AS datasourceid, + d.id AS id, array_remove(d.id || array_agg(distinct CASE WHEN dp.pid like 'piwik%' THEN di.pid ELSE NULL END) || array_agg(distinct dds.duplicate), NULL) AS originalid, array_remove(array_agg(distinct CASE WHEN di.pid NOT LIKE 'piwik%' THEN di.pid||'###'||di.issuertype||'@@@'||'dnet:pid_types' ELSE NULL END), NULL) as pid, d.officialname AS officialname, From 9e12cb3c9266184df07ca4549461e87b9729d1ca Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 3 May 2022 11:55:45 +0200 Subject: [PATCH 12/12] EOSC Services - removed field knowledgegraph; depending on the released schema module --- .../eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java | 5 ----- pom.xml | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index b811a7a0f..27128138c 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -774,11 +774,6 @@ public class XmlRecordFactory implements Serializable { metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString())); } - if (ds.getKnowledgegraph() != null) { - metadata - .add(XmlSerializationUtils.asXmlElement("knowledgegraph", ds.getKnowledgegraph().toString())); - } - if (ds.getContentpolicies() != null) { metadata .addAll( diff --git a/pom.xml b/pom.xml index 03a4496d7..54070f654 100644 --- a/pom.xml +++ b/pom.xml @@ -801,7 +801,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.11.34-eosc-SNAPSHOT] + [2.12.0] [4.0.3] [6.0.5] [3.1.6]