From 6a3b081263b67876b37d4fb6e59732b8b1c7d58a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 11 May 2020 16:09:20 +0200 Subject: [PATCH 01/25] added the last step of blacklisteing --- .../eu/dnetlib/dhp/wf/profiles/provision.xml | 128 +++++++----------- 1 file changed, 51 insertions(+), 77 deletions(-) diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml index 0467e618f6..7c918a0d70 100644 --- a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml @@ -4,7 +4,7 @@ - + Data Provision [OCEAN] @@ -131,6 +131,16 @@ + + Set the target path to store the blacklisted graph + + blacklistedGraphPath + /tmp/beta_provision/graph/12_graph_blacklisted + + + + + Set the lookup address @@ -155,64 +165,8 @@ Set the map of associations organization, community list for the propagation of community to result through organization propagationOrganizationCommunityMap - - { - "20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], - "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], - "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"], - "20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"], - "20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"], - "20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"], - "20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"], - "20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"], - "20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"], - "20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"], - "20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"], - "20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"], - "20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"], - "20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"], - "20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"], - "20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"], - "20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"], - "20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"], - "20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"], - "20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"], - "20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"], - "20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"], - "20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], - "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], - "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], - "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], - "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], - "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], - "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], - "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], - "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], - "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], - "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], - "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], - "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], - "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], - "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], - "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], - "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], - "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], - "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], - "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], - "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], - "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], - "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], - "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], - "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], - "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], - "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], - "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], - "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], - "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], - "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], - "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], - "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"] - } + {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], + "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} @@ -273,8 +227,8 @@ 'mongoDb' : 'mdstore', 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : 'dnet', - 'postgresPassword' : '*****', - 'reuseContent' : 'false', + 'postgresPassword' : 'dnetPwd', + 'reuseContent' : 'true', 'contentPath' : '/tmp/beta_provision/aggregator', 'workingDir' : '/tmp/beta_provision/working_dir/aggregator' } @@ -403,7 +357,6 @@ - propagates ORCID among results linked by allowedsemrels semantic relationships @@ -429,7 +382,6 @@ - mark results respecting some rules as belonging to communities @@ -440,7 +392,7 @@ 'sourcePath' : 'orcidGraphPath', 'outputPath': 'bulkTaggingGraphPath', 'isLookUpUrl' : 'isLookUpUrl', - 'pathMap' : 'bulkTaggingPathMap', + 'pathMap' : 'bulkTaggingPathMap' } @@ -455,7 +407,6 @@ - creates relashionships between results and organizations when the organizations are associated to institutional repositories @@ -464,14 +415,14 @@ { 'sourcePath' : 'bulkTaggingGraphPath', - 'outputPath': 'affiliationGraphPath', - 'saveGraph' : 'true' + 'outputPath': 'affiliationGraphPath' } { 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/affiliation/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/affiliation' + 'workingDir' : '/tmp/beta_provision/working_dir/affiliation', + 'saveGraph' : 'true' } build-report @@ -480,7 +431,6 @@ - marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap @@ -506,7 +456,6 @@ - created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects @@ -532,7 +481,6 @@ - tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities @@ -542,14 +490,15 @@ { 'sourcePath' : 'fundingGraphPath', 'outputPath': 'communitySemRelGraphPath', - 'isLookupUrl' : 'isLookUpUrl' + 'isLookUpUrl' : 'isLookUpUrl' } { 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_semrel/oozie_app', 'workingDir' : '/tmp/beta_provision/working_dir/community_semrel', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo' + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' } build-report @@ -558,7 +507,6 @@ - associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from @@ -581,16 +529,42 @@ build-report + + + + + + removes blacklisted relations + + executeOozieJob + IIS + + { + 'sourcePath' : 'countryGraphPath', + 'outputPath': 'blacklistedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/blacklist', + 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', + 'postgresUser' : 'dnet', + 'postgresPassword' : 'dnetPwd' + } + + build-report + - wf_20200428_155848_495 - 2020-04-28T16:53:23+00:00 + wf_20200509_100941_857 + 2020-05-09T13:26:09+00:00 FAILURE - + eu.dnetlib.data.hadoop.rmi.HadoopServiceException: hadoop job: 0002933-200403132837156-oozie-oozi-W failed with status: KILLED, oozie log: 2020-05-09 13:23:31,194 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[] No results found 2020-05-09 13:23:31,216 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] Start action [0002933-200403132837156-oozie-oozi-W@:start:] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:31,216 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] [***0002933-200403132837156-oozie-oozi-W@:start:***]Action status=DONE 2020-05-09 13:23:31,216 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] [***0002933-200403132837156-oozie-oozi-W@:start:***]Action updated in DB! 2020-05-09 13:23:31,257 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] No results found 2020-05-09 13:23:31,275 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@:start: 2020-05-09 13:23:31,275 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W 2020-05-09 13:23:31,314 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] Start action [0002933-200403132837156-oozie-oozi-W@reset-outputpath] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:33,897 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] [***0002933-200403132837156-oozie-oozi-W@reset-outputpath***]Action status=DONE 2020-05-09 13:23:33,897 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] [***0002933-200403132837156-oozie-oozi-W@reset-outputpath***]Action updated in DB! 2020-05-09 13:23:33,947 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] No results found 2020-05-09 13:23:33,966 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] Start action [0002933-200403132837156-oozie-oozi-W@copy_entities] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:33,966 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] [***0002933-200403132837156-oozie-oozi-W@copy_entities***]Action status=DONE 2020-05-09 13:23:33,966 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] [***0002933-200403132837156-oozie-oozi-W@copy_entities***]Action updated in DB! 2020-05-09 13:23:34,012 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No results found 2020-05-09 13:23:34,018 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No results found 2020-05-09 13:23:34,023 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No results found 2020-05-09 13:23:34,029 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No results found 2020-05-09 13:23:34,124 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] Start action [0002933-200403132837156-oozie-oozi-W@copy_relation] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:34,130 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] Start action [0002933-200403132837156-oozie-oozi-W@copy_projects] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:34,130 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] Start action [0002933-200403132837156-oozie-oozi-W@copy_datasources] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:34,140 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] Start action [0002933-200403132837156-oozie-oozi-W@copy_organization] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:35,010 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] checking action, hadoop job ID [job_1585920557248_14569] status [RUNNING] 2020-05-09 13:23:35,018 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] [***0002933-200403132837156-oozie-oozi-W@copy_projects***]Action status=RUNNING 2020-05-09 13:23:35,018 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] [***0002933-200403132837156-oozie-oozi-W@copy_projects***]Action updated in DB! 2020-05-09 13:23:35,022 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] checking action, hadoop job ID [job_1585920557248_14568] status [RUNNING] 2020-05-09 13:23:35,027 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_projects 2020-05-09 13:23:35,028 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] [***0002933-200403132837156-oozie-oozi-W@copy_relation***]Action status=RUNNING 2020-05-09 13:23:35,028 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] [***0002933-200403132837156-oozie-oozi-W@copy_relation***]Action updated in DB! 2020-05-09 13:23:35,031 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] checking action, hadoop job ID [job_1585920557248_14570] status [RUNNING] 2020-05-09 13:23:35,035 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] [***0002933-200403132837156-oozie-oozi-W@copy_datasources***]Action status=RUNNING 2020-05-09 13:23:35,035 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] [***0002933-200403132837156-oozie-oozi-W@copy_datasources***]Action updated in DB! 2020-05-09 13:23:35,037 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_relation 2020-05-09 13:23:35,048 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_datasources 2020-05-09 13:23:35,072 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] checking action, hadoop job ID [job_1585920557248_14571] status [RUNNING] 2020-05-09 13:23:35,076 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] [***0002933-200403132837156-oozie-oozi-W@copy_organization***]Action status=RUNNING 2020-05-09 13:23:35,076 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] [***0002933-200403132837156-oozie-oozi-W@copy_organization***]Action updated in DB! 2020-05-09 13:23:35,084 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_organization 2020-05-09 13:23:35,090 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_entities 2020-05-09 13:23:35,090 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@reset-outputpath 2020-05-09 13:23:58,926 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] callback for action [0002933-200403132837156-oozie-oozi-W@copy_datasources] 2020-05-09 13:23:59,085 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] checking action, hadoop job ID [job_1585920557248_14570] status [RUNNING] 2020-05-09 13:23:59,242 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] callback for action [0002933-200403132837156-oozie-oozi-W@copy_projects] 2020-05-09 13:23:59,386 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] checking action, hadoop job ID [job_1585920557248_14569] status [RUNNING] 2020-05-09 13:24:01,343 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] callback for action [0002933-200403132837156-oozie-oozi-W@copy_datasources] 2020-05-09 13:24:01,418 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] Hadoop Jobs launched : [job_1585920557248_14573] 2020-05-09 13:24:01,418 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] action completed, external ID [job_1585920557248_14570] 2020-05-09 13:24:01,493 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_datasources 2020-05-09 13:24:01,935 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] callback for action [0002933-200403132837156-oozie-oozi-W@copy_projects] 2020-05-09 13:24:02,012 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] Hadoop Jobs launched : [job_1585920557248_14572] 2020-05-09 13:24:02,012 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] action completed, external ID [job_1585920557248_14569] 2020-05-09 13:24:02,076 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_projects 2020-05-09 13:25:03,172 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] callback for action [0002933-200403132837156-oozie-oozi-W@copy_organization] 2020-05-09 13:25:03,336 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] checking action, hadoop job ID [job_1585920557248_14571] status [RUNNING] 2020-05-09 13:25:05,598 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] callback for action [0002933-200403132837156-oozie-oozi-W@copy_organization] 2020-05-09 13:25:05,688 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] Hadoop Jobs launched : [job_1585920557248_14574] 2020-05-09 13:25:05,691 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] action completed, external ID [job_1585920557248_14571] 2020-05-09 13:25:05,748 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_organization 2020-05-09 13:25:23,274 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] callback for action [0002933-200403132837156-oozie-oozi-W@copy_relation] 2020-05-09 13:25:23,409 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] checking action, hadoop job ID [job_1585920557248_14568] status [RUNNING] 2020-05-09 13:25:25,419 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] callback for action [0002933-200403132837156-oozie-oozi-W@copy_relation] 2020-05-09 13:25:25,510 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] Hadoop Jobs launched : [job_1585920557248_14575] 2020-05-09 13:25:25,511 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] action completed, external ID [job_1585920557248_14568] 2020-05-09 13:25:25,565 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] No results found 2020-05-09 13:25:25,585 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] Start action [0002933-200403132837156-oozie-oozi-W@copy_wait] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,585 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] [***0002933-200403132837156-oozie-oozi-W@copy_wait***]Action status=DONE 2020-05-09 13:25:25,585 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] [***0002933-200403132837156-oozie-oozi-W@copy_wait***]Action updated in DB! 2020-05-09 13:25:25,627 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] No results found 2020-05-09 13:25:25,648 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] Start action [0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,648 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] [***0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1***]Action status=DONE 2020-05-09 13:25:25,648 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] [***0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1***]Action updated in DB! 2020-05-09 13:25:25,694 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No results found 2020-05-09 13:25:25,700 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No results found 2020-05-09 13:25:25,706 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No results found 2020-05-09 13:25:25,711 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No results found 2020-05-09 13:25:25,801 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] Start action [0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,825 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] Start action [0002933-200403132837156-oozie-oozi-W@join_prepare_software] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,825 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] Start action [0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,828 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] Start action [0002933-200403132837156-oozie-oozi-W@join_prepare_publication] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:27,165 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] checking action, hadoop job ID [job_1585920557248_14578] status [RUNNING] 2020-05-09 13:25:27,170 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] [***0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct***]Action status=RUNNING 2020-05-09 13:25:27,170 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] [***0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct***]Action updated in DB! 2020-05-09 13:25:27,179 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] checking action, hadoop job ID [job_1585920557248_14577] status [RUNNING] 2020-05-09 13:25:27,181 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct 2020-05-09 13:25:27,183 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] [***0002933-200403132837156-oozie-oozi-W@join_prepare_software***]Action status=RUNNING 2020-05-09 13:25:27,183 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] [***0002933-200403132837156-oozie-oozi-W@join_prepare_software***]Action updated in DB! 2020-05-09 13:25:27,188 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_software 2020-05-09 13:25:27,617 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] checking action, hadoop job ID [job_1585920557248_14576] status [RUNNING] 2020-05-09 13:25:27,622 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] [***0002933-200403132837156-oozie-oozi-W@join_prepare_publication***]Action status=RUNNING 2020-05-09 13:25:27,622 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] [***0002933-200403132837156-oozie-oozi-W@join_prepare_publication***]Action updated in DB! 2020-05-09 13:25:27,625 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] checking action, hadoop job ID [job_1585920557248_14579] status [RUNNING] 2020-05-09 13:25:27,628 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_publication 2020-05-09 13:25:27,629 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] [***0002933-200403132837156-oozie-oozi-W@join_prepare_dataset***]Action status=RUNNING 2020-05-09 13:25:27,629 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] [***0002933-200403132837156-oozie-oozi-W@join_prepare_dataset***]Action updated in DB! 2020-05-09 13:25:27,634 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_dataset 2020-05-09 13:25:27,639 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1 2020-05-09 13:25:27,639 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_wait 2020-05-09 13:25:27,640 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_relation 2020-05-09 13:25:41,416 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] callback for action [0002933-200403132837156-oozie-oozi-W@join_prepare_software] 2020-05-09 13:25:41,490 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] action completed, external ID [job_1585920557248_14577] 2020-05-09 13:25:41,495 WARN org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] Launcher ERROR, reason: Main class [org.apache.oozie.action.hadoop.SparkMain], main() threw exception, File file:/data/3/yarn/nm/usercache/dnet.beta/appcache/application_1585920557248_14577/container_e68_1585920557248_14577_01_000002/dhp-propagation-1.1.8-SNAPSHOT.jar does not exist 2020-05-09 13:25:41,495 WARN org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] Launcher exception: File file:/data/3/yarn/nm/usercache/dnet.beta/appcache/application_1585920557248_14577/container_e68_1585920557248_14577_01_000002/dhp-propagation-1.1.8-SNAPSHOT.jar does not exist java.io.FileNotFoundException: File file:/data/3/yarn/nm/usercache/dnet.beta/appcache/application_1585920557248_14577/container_e68_1585920557248_14577_01_000002/dhp-propagation-1.1.8-SNAPSHOT.jar does not exist at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:598) at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:811) at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:588) at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:432) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:340) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:292) at org.apache.spark.deploy.yarn.Client.copyFileToRemote(Client.scala:404) at org.apache.spark.deploy.yarn.Client.org$apache$spark$deploy$yarn$Client$$distribute$1(Client.scala:496) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$9.apply(Client.scala:595) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$9.apply(Client.scala:594) at scala.Option.foreach(Option.scala:257) at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:594) at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:886) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:180) at org.apache.spark.deploy.yarn.Client.run(Client.scala:1156) at org.apache.spark.deploy.yarn.YarnClusterApplication.start(Client.scala:1608) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) at org.apache.oozie.action.hadoop.SparkMain.runSpark(SparkMain.java:178) at org.apache.oozie.action.hadoop.SparkMain.run(SparkMain.java:90) at org.apache.oozie.action.hadoop.LauncherMain.run(LauncherMain.java:81) at org.apache.oozie.action.hadoop.SparkMain.main(SparkMain.java:57) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.oozie.action.hadoop.LauncherMapper.map(LauncherMapper.java:235) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:459) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) 2020-05-09 13:25:41,514 INFO org.apache.oozie.command.wf.ActionEndXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] ERROR is considered as FAILED for SLA 2020-05-09 13:25:41,541 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] No results found 2020-05-09 13:25:41,580 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] Start action [0002933-200403132837156-oozie-oozi-W@Kill] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:41,580 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] [***0002933-200403132837156-oozie-oozi-W@Kill***]Action status=DONE 2020-05-09 13:25:41,580 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] [***0002933-200403132837156-oozie-oozi-W@Kill***]Action updated in DB! 2020-05-09 13:25:41,692 WARN org.apache.oozie.workflow.lite.LiteWorkflowInstance: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] Workflow completed [KILLED], killing [3] running nodes 2020-05-09 13:25:41,760 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@Kill 2020-05-09 13:25:41,766 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_software 2020-05-09 13:25:41,852 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct 2020-05-09 13:25:41,914 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] callback for action [0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] 2020-05-09 13:25:41,920 ERROR org.apache.oozie.command.wf.CompletedActionXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] XException, org.apache.oozie.command.CommandException: E0800: Action it is not running its in [KILLED] state, action [0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] at org.apache.oozie.command.wf.CompletedActionXCommand.eagerVerifyPrecondition(CompletedActionXCommand.java:92) at org.apache.oozie.command.XCommand.call(XCommand.java:257) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:179) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 2020-05-09 13:25:41,938 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_publication 2020-05-09 13:25:42,005 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] callback for action [0002933-200403132837156-oozie-oozi-W@join_prepare_publication] 2020-05-09 13:25:42,010 ERROR org.apache.oozie.command.wf.CompletedActionXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] XException, org.apache.oozie.command.CommandException: E0800: Action it is not running its in [KILLED] state, action [0002933-200403132837156-oozie-oozi-W@join_prepare_publication] at org.apache.oozie.command.wf.CompletedActionXCommand.eagerVerifyPrecondition(CompletedActionXCommand.java:92) at org.apache.oozie.command.XCommand.call(XCommand.java:257) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:179) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 2020-05-09 13:25:42,028 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W 2020-05-09 13:25:42,028 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_dataset 2020-05-09 13:25:42,113 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] callback for action [0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] 2020-05-09 13:25:42,116 ERROR org.apache.oozie.command.wf.CompletedActionXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] XException, org.apache.oozie.command.CommandException: E0800: Action it is not running its in [KILLED] state, action [0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] at org.apache.oozie.command.wf.CompletedActionXCommand.eagerVerifyPrecondition(CompletedActionXCommand.java:92) at org.apache.oozie.command.XCommand.call(XCommand.java:257) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:179) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) \ No newline at end of file From 5ab3424c77ca90b06055a055f2f98baefcb919b7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 11 May 2020 16:09:37 +0200 Subject: [PATCH 02/25] removed unused dependencies --- dhp-workflows/dhp-bulktag/pom.xml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/dhp-workflows/dhp-bulktag/pom.xml b/dhp-workflows/dhp-bulktag/pom.xml index 7c2afa0cca..98922c1939 100644 --- a/dhp-workflows/dhp-bulktag/pom.xml +++ b/dhp-workflows/dhp-bulktag/pom.xml @@ -43,17 +43,6 @@ com.jayway.jsonpath json-path - - org.reflections - reflections - 0.9.11 - compile - - - com.google.guava - guava - 23.3-jre - io.github.classgraph classgraph From e883daf87e79daad9cca2d88ed4b55eafab7b7ad Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 11 May 2020 16:10:24 +0200 Subject: [PATCH 03/25] added the outputPath parameter and the reset path to remove the outputath directory --- .../eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml index 855cac65eb..b980016628 100644 --- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml @@ -16,6 +16,10 @@ sourcePath the source path + + outputPath + the path were to store the graph without the blacklisted relations + @@ -25,7 +29,8 @@ - + + @@ -87,7 +92,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${sourcePath}/relation - --outputPath${workingDir}/relation + --outputPath${outputPath}/relation --hdfsPath${workingDir}/blacklist --mergesPath${workingDir}/mergesRelation From 50659011ebe7719eb7ede6558a32a5878efdb8f3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 11 May 2020 16:14:26 +0200 Subject: [PATCH 04/25] refactoring --- .../test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java index 2d6b1061b3..0487a5844d 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java @@ -19,6 +19,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.schema.oaf.Relation; public class BlackListTest { @@ -60,12 +61,7 @@ public class BlackListTest { spark.stop(); } - /* - * String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); final String outputPath = - * parser.get("outputPath"); log.info("outputPath {}: ", outputPath); final String blacklistPath = - * parser.get("hdfsPath"); log.info("blacklistPath {}: ", blacklistPath); final String mergesPath = - * parser.get("mergesPath"); log.info("mergesPath {}: ", mergesPath); - */ + @Test public void noRemoveTest() throws Exception { SparkRemoveBlacklistedRelationJob From 8610ad5142c932a8eddb2deb09475fd7bce1f0d4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 May 2020 15:32:55 +0200 Subject: [PATCH 05/25] added groupby id to fix multiple result with same id at join step --- .../PrepareResultCommunitySet.java | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index 5574aad753..5d0b75a8e0 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.*; @@ -19,6 +20,7 @@ import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; public class PrepareResultCommunitySet { @@ -93,10 +95,24 @@ public class PrepareResultCommunitySet { result_organizationset .map(mapResultCommunityFn(organizationMap), Encoders.bean(ResultCommunityList.class)) .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); + .toJavaRDD() + .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) + .reduceByKey((a, b) -> { + ArrayList cl = a.getCommunityList(); + b.getCommunityList().stream().forEach(s -> { + if (!cl.contains(s)) { + cl.add(s); + } + }); + a.setCommunityList(cl); + return a; + }) + .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) + .saveAsTextFile(outputPath, GzipCodec.class); +// .write() +// .mode(SaveMode.Overwrite) +// .option("compression", "gzip") +// .json(outputPath); } private static MapFunction mapResultCommunityFn( From 29066a6b46f94903bdbb709c5bf32552a5be5343 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 May 2020 15:38:50 +0200 Subject: [PATCH 06/25] applied code cleanup --- .../dhp/bulktag/community/CommunityConfiguration.java | 2 +- .../java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java | 2 +- .../SparkResultToProjectThroughSemRelJob.java | 6 +----- .../SparkResultToOrganizationFromIstRepoJob.java | 4 +--- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 29ddde15f8..844fe29621 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -131,7 +131,7 @@ public class CommunityConfiguration implements Serializable { p -> { if (p.getSnd() == null) return p.getFst(); - if (((SelectionConstraints) p.getSnd()).verifyCriteria(param)) + if (p.getSnd().verifyCriteria(param)) return p.getFst(); else return null; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index 3d0db20632..f54a1cebab 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -34,7 +34,7 @@ public class VerbResolver implements Serializable { .collect( Collectors .toMap( - value -> (String) ((ClassInfo) value) + value -> (String) value .getAnnotationInfo() .get(0) .getParameterValues() diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 1f6264c186..17f6a057d8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -105,11 +105,7 @@ public class SparkResultToProjectThroughSemRelJob { .stream() .forEach( (p -> { - if (potential_update - .getProjectSet() - .contains(p)) { - potential_update.getProjectSet().remove(p); - } + potential_update.getProjectSet().remove(p); })); } String resId = potential_update.getResultId(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 0c5e1d8be6..ff34bd42a7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -136,9 +136,7 @@ public class SparkResultToOrganizationFromIstRepoJob { .stream() .forEach( rId -> { - if (organization_list.contains(rId)) { - organization_list.remove(rId); - } + organization_list.remove(rId); }); } String resultId = potential_update.getResultId(); From 0d1ec1913f40827b5e3fbda2575082072c9123ba Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 May 2020 18:42:25 +0200 Subject: [PATCH 07/25] added fix to avoid duplication of results --- .../PrepareResultCommunitySet.java | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index 5d0b75a8e0..750d333e53 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -95,20 +95,20 @@ public class PrepareResultCommunitySet { result_organizationset .map(mapResultCommunityFn(organizationMap), Encoders.bean(ResultCommunityList.class)) .filter(Objects::nonNull) - .toJavaRDD() - .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) - .reduceByKey((a, b) -> { - ArrayList cl = a.getCommunityList(); - b.getCommunityList().stream().forEach(s -> { - if (!cl.contains(s)) { - cl.add(s); - } - }); - a.setCommunityList(cl); - return a; - }) - .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) - .saveAsTextFile(outputPath, GzipCodec.class); + .toJavaRDD() + .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) + .reduceByKey((a, b) -> { + ArrayList cl = a.getCommunityList(); + b.getCommunityList().stream().forEach(s -> { + if (!cl.contains(s)) { + cl.add(s); + } + }); + a.setCommunityList(cl); + return a; + }) + .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) + .saveAsTextFile(outputPath, GzipCodec.class); // .write() // .mode(SaveMode.Overwrite) // .option("compression", "gzip") From 8f6ce970f9e527d30f30cf69882cfc67505d70bf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 25 May 2020 10:25:55 +0200 Subject: [PATCH 08/25] moved PacePerson to dhp-common to avoid conflict in dependency with graph-mapper --- .../src/main/java/eu/dnetlib/dhp}/common/PacePerson.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename {dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw => dhp-common/src/main/java/eu/dnetlib/dhp}/common/PacePerson.java (99%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index 6e474f2f38..1909ddcca6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.graph.raw.common; +package eu.dnetlib.dhp.common; import java.nio.charset.StandardCharsets; import java.text.Normalizer; From b258f99ece5ab9ff2ffce961dc91d18383690947 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 25 May 2020 10:26:48 +0200 Subject: [PATCH 09/25] fix for issue that duplicated result --- .../PrepareDatasourceCountryAssociation.java | 65 ++++++++++++------- .../PrepareResultCountrySet.java | 38 +++++++++-- 2 files changed, 76 insertions(+), 27 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 98b573102a..f28c5aa06d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -77,9 +77,15 @@ public class PrepareDatasourceCountryAssociation { List allowedtypes, String inputPath, String outputPath) { - String whitelisted = ""; - for (String i : whitelist) { - whitelisted += " OR id = '" + i + "'"; + String whitelisted = " d.id = '" + whitelist.get(0) + "'"; + for (int i = 1; i < whitelist.size(); i++) { + whitelisted += " OR d.id = '" + whitelist.get(i) + "'"; + } + + String allowed = "d.datasourcetype.classid = '" + allowedtypes.get(0) + "'"; + + for (int i = 1; i < allowedtypes.size(); i++) { + allowed += " OR d.datasourcetype.classid = '" + allowedtypes.get(i) + "'"; } Dataset datasource = readPath(spark, inputPath + "/datasource", Datasource.class); @@ -90,26 +96,39 @@ public class PrepareDatasourceCountryAssociation { relation.createOrReplaceTempView("relation"); organization.createOrReplaceTempView("organization"); - String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country " - + "FROM ( SELECT id " - + " FROM datasource " - + " WHERE (datainfo.deletedbyinference = false " - + whitelisted - + ") " - + getConstraintList("datasourcetype.classid = '", allowedtypes) - + ") d " - + "JOIN ( SELECT source, target " - + " FROM relation " - + " WHERE relclass = '" - + ModelConstants.IS_PROVIDED_BY - + "' " - + " AND datainfo.deletedbyinference = false ) rel " - + "ON d.id = rel.source " - + "JOIN (SELECT id, country " - + " FROM organization " - + " WHERE datainfo.deletedbyinference = false " - + " AND length(country.classid) > 0) o " - + "ON o.id = rel.target"; +// String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country " +// + "FROM ( SELECT id " +// + " FROM datasource " +// + " WHERE (datainfo.deletedbyinference = false " +// + whitelisted +// + ") " +// + getConstraintList("datasourcetype.classid = '", allowedtypes) +// + ") d " +// + "JOIN ( SELECT source, target " +// + " FROM relation " +// + " WHERE relclass = '" +// + ModelConstants.IS_PROVIDED_BY +// + "' " +// + " AND datainfo.deletedbyinference = false ) rel " +// + "ON d.id = rel.source " +// + "JOIN (SELECT id, country " +// + " FROM organization " +// + " WHERE datainfo.deletedbyinference = false " +// + " AND length(country.classid) > 0) o " +// + "ON o.id = rel.target"; + + String query = "SELECT source dataSourceId, " + + "named_struct('classid', country.classid, 'classname', country.classname) country " + + "FROM datasource d " + + "JOIN relation rel " + + "ON d.id = rel.source " + + "JOIN organization o " + + "ON o.id = rel.target " + + "WHERE rel.datainfo.deletedbyinference = false " + + "and rel.relclass = '" + ModelConstants.IS_PROVIDED_BY + "'" + + "and o.datainfo.deletedbyinference = false " + + "and length(o.country.classid) > 0 " + + "and (" + allowed + " or " + whitelisted + ")"; spark .sql(query) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 34b3764131..8d0d6c48b9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -4,7 +4,12 @@ package eu.dnetlib.dhp.countrypropagation; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import java.util.ArrayList; +import java.util.Set; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; @@ -13,6 +18,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.*; +import scala.Tuple2; public class PrepareResultCountrySet { private static final Logger log = LoggerFactory.getLogger(PrepareResultCountrySet.class); @@ -60,6 +66,7 @@ public class PrepareResultCountrySet { conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); getPotentialResultToUpdate( spark, inputPath, @@ -89,10 +96,33 @@ public class PrepareResultCountrySet { spark .sql(RESULT_COUNTRYSET_QUERY) .as(Encoders.bean(ResultCountrySet.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Append) - .json(outputPath); + .toJavaRDD() + .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) + .reduceByKey((a, b) -> { + ArrayList countryList = a.getCountrySet(); + Set countryCodes = countryList + .stream() + .map(country -> country.getClassid()) + .collect(Collectors.toSet()); + b + .getCountrySet() + .stream() + .forEach(c -> { + if (!countryCodes.contains(c.getClassid())) { + countryList.add(c); + countryCodes.add(c.getClassid()); + } + + }); + a.setCountrySet(countryList); + return a; + }) + .map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2())) + .saveAsTextFile(outputPath, GzipCodec.class); +// .write() +// .option("compression", "gzip") +// .mode(SaveMode.Append) +// .json(outputPath); } } From 8f51af4e9bb965af3994b607d0288d0ba7bb31d4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 25 May 2020 10:34:30 +0200 Subject: [PATCH 10/25] added PacePerson to get name surname for authors having only fullname set --- .../SparkOrcidToResultFromSemRelJob.java | 98 +++++++++++-------- 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index bea847ca76..e4ffc56982 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.List; import java.util.Optional; +import eu.dnetlib.dhp.common.PacePerson; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -129,61 +130,74 @@ public class SparkOrcidToResultFromSemRelJob { } private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) { - boolean toaddpid = false; + boolean toaddpid = false; - if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { - if (StringUtils.isNotEmpty(author.getSurname())) { - if (autoritative_author - .getSurname() - .trim() - .equalsIgnoreCase(author.getSurname().trim())) { + String author_name = author.getName(); + String author_surname = author.getSurname(); - // have the same surname. Check the name - if (StringUtils.isNotEmpty(autoritative_author.getName())) { - if (StringUtils.isNotEmpty(author.getName())) { - if (autoritative_author - .getName() - .trim() - .equalsIgnoreCase(author.getName().trim())) { - toaddpid = true; - } - // they could be differently written (i.e. only the initials of the name - // in one of the two - else { + if(StringUtils.isEmpty(author_name) || StringUtils.isEmpty(author_surname)){ + PacePerson pp = new PacePerson(author.getFullname(), false); + if (pp.isAccurate()){ + author_name = pp.getNormalisedFirstName(); + author_surname = pp.getNormalisedSurname(); + + } + } + + if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { + if (StringUtils.isNotEmpty(author_surname)) { + if (autoritative_author + .getSurname() + .trim() + .equalsIgnoreCase(author_surname.trim())) { + + // have the same surname. Check the name + if (StringUtils.isNotEmpty(autoritative_author.getName())) { + if (StringUtils.isNotEmpty(author_name)) { if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author.getName().trim().substring(0, 0))) { + .getName() + .trim() + .equalsIgnoreCase(author_name.trim())) { toaddpid = true; } + // they could be differently written (i.e. only the initials of the name + // in one of the two + else { + if (autoritative_author + .getName() + .trim() + .substring(0, 0) + .equalsIgnoreCase(author_name.trim().substring(0, 0))) { + toaddpid = true; + } + } } } } } } - } - if (toaddpid) { - StructuredProperty p = new StructuredProperty(); - p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); - p - .setDataInfo( - getDataInfo( - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME)); + if (toaddpid) { + StructuredProperty p = new StructuredProperty(); + p.setValue(autoritative_author.getOrcid()); + p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); + p + .setDataInfo( + getDataInfo( + PROPAGATION_DATA_INFO_TYPE, + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME)); + + Optional> authorPid = Optional.ofNullable(author.getPid()); + if (authorPid.isPresent()) { + authorPid.get().add(p); + } else { + author.setPid(Lists.newArrayList(p)); + } - Optional> authorPid = Optional.ofNullable(author.getPid()); - if (authorPid.isPresent()) { - authorPid.get().add(p); - } else { - author.setPid(Lists.newArrayList(p)); } - + return toaddpid; } - return toaddpid; - } + private static boolean containsAllowedPid(Author a) { Optional> pids = Optional.ofNullable(a.getPid()); From f754c424bd87aaf6d403e3734a281288b584de83 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 25 May 2020 10:35:02 +0200 Subject: [PATCH 11/25] changed logic to compute only onece PacePerson for each Author to be enriched --- .../SparkOrcidToResultFromSemRelJob.java | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index e4ffc56982..34882b5872 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -122,29 +122,24 @@ public class SparkOrcidToResultFromSemRelJob { } private static void enrichAuthor(Author a, List au) { + PacePerson pp = new PacePerson(a.getFullname(), false); for (AutoritativeAuthor aa : au) { - if (enrichAuthor(aa, a)) { + if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname() )) { return; } } } - private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) { + private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author, + String author_name, + String author_surname) { boolean toaddpid = false; - String author_name = author.getName(); - String author_surname = author.getSurname(); - - if(StringUtils.isEmpty(author_name) || StringUtils.isEmpty(author_surname)){ - PacePerson pp = new PacePerson(author.getFullname(), false); - if (pp.isAccurate()){ - author_name = pp.getNormalisedFirstName(); - author_surname = pp.getNormalisedSurname(); - - } - } if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { + if (StringUtils.isNotEmpty(author.getSurname())){ + author_surname = author.getSurname(); + } if (StringUtils.isNotEmpty(author_surname)) { if (autoritative_author .getSurname() @@ -153,6 +148,9 @@ public class SparkOrcidToResultFromSemRelJob { // have the same surname. Check the name if (StringUtils.isNotEmpty(autoritative_author.getName())) { + if(StringUtils.isNotEmpty(author.getName())){ + author_name = author.getName(); + } if (StringUtils.isNotEmpty(author_name)) { if (autoritative_author .getName() From dbde2d243a21c218a22a315af785236f9d56658f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 25 May 2020 10:35:39 +0200 Subject: [PATCH 12/25] changed due to move of PacePerson from dhp-graph-mapper to dhp-common --- .../main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java | 2 +- .../main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index af9fe7197e..53c0913c23 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -23,7 +23,7 @@ import org.dom4j.Node; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; +import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Field; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 9c74c4a938..b9a1599174 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -28,7 +28,7 @@ import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Node; -import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; +import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Field; From 74215f6d9f2ae3ea9a06f7e66b10be25a2f08568 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 25 May 2020 10:38:16 +0200 Subject: [PATCH 13/25] refactoring --- .../projecttoresult/SparkResultToProjectThroughSemRelJob.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 17f6a057d8..0791fd68ce 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -105,7 +105,7 @@ public class SparkResultToProjectThroughSemRelJob { .stream() .forEach( (p -> { - potential_update.getProjectSet().remove(p); + potential_update.getProjectSet().remove(p); })); } String resId = potential_update.getResultId(); From eea07f4c4208ad1eef36efea397308dc32f7bd54 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 26 May 2020 09:21:49 +0200 Subject: [PATCH 14/25] refactoring --- .../dnetlib/dhp/blacklist/BlackListTest.java | 1 - .../SparkOrcidToResultFromSemRelJob.java | 106 +++++++++--------- 2 files changed, 52 insertions(+), 55 deletions(-) diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java index 0487a5844d..585848589c 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java @@ -61,7 +61,6 @@ public class BlackListTest { spark.stop(); } - @Test public void noRemoveTest() throws Exception { SparkRemoveBlacklistedRelationJob diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 34882b5872..3fc1270645 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.List; import java.util.Optional; -import eu.dnetlib.dhp.common.PacePerson; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -23,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; @@ -124,78 +124,76 @@ public class SparkOrcidToResultFromSemRelJob { private static void enrichAuthor(Author a, List au) { PacePerson pp = new PacePerson(a.getFullname(), false); for (AutoritativeAuthor aa : au) { - if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname() )) { + if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname())) { return; } } } private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author, - String author_name, - String author_surname) { - boolean toaddpid = false; + String author_name, + String author_surname) { + boolean toaddpid = false; + if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { + if (StringUtils.isNotEmpty(author.getSurname())) { + author_surname = author.getSurname(); + } + if (StringUtils.isNotEmpty(author_surname)) { + if (autoritative_author + .getSurname() + .trim() + .equalsIgnoreCase(author_surname.trim())) { - if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { - if (StringUtils.isNotEmpty(author.getSurname())){ - author_surname = author.getSurname(); - } - if (StringUtils.isNotEmpty(author_surname)) { - if (autoritative_author - .getSurname() - .trim() - .equalsIgnoreCase(author_surname.trim())) { - - // have the same surname. Check the name - if (StringUtils.isNotEmpty(autoritative_author.getName())) { - if(StringUtils.isNotEmpty(author.getName())){ - author_name = author.getName(); + // have the same surname. Check the name + if (StringUtils.isNotEmpty(autoritative_author.getName())) { + if (StringUtils.isNotEmpty(author.getName())) { + author_name = author.getName(); + } + if (StringUtils.isNotEmpty(author_name)) { + if (autoritative_author + .getName() + .trim() + .equalsIgnoreCase(author_name.trim())) { + toaddpid = true; } - if (StringUtils.isNotEmpty(author_name)) { + // they could be differently written (i.e. only the initials of the name + // in one of the two + else { if (autoritative_author - .getName() - .trim() - .equalsIgnoreCase(author_name.trim())) { + .getName() + .trim() + .substring(0, 0) + .equalsIgnoreCase(author_name.trim().substring(0, 0))) { toaddpid = true; } - // they could be differently written (i.e. only the initials of the name - // in one of the two - else { - if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author_name.trim().substring(0, 0))) { - toaddpid = true; - } - } } } } } } - if (toaddpid) { - StructuredProperty p = new StructuredProperty(); - p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); - p - .setDataInfo( - getDataInfo( - PROPAGATION_DATA_INFO_TYPE, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME)); - - Optional> authorPid = Optional.ofNullable(author.getPid()); - if (authorPid.isPresent()) { - authorPid.get().add(p); - } else { - author.setPid(Lists.newArrayList(p)); - } - - } - return toaddpid; } + if (toaddpid) { + StructuredProperty p = new StructuredProperty(); + p.setValue(autoritative_author.getOrcid()); + p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); + p + .setDataInfo( + getDataInfo( + PROPAGATION_DATA_INFO_TYPE, + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME)); + Optional> authorPid = Optional.ofNullable(author.getPid()); + if (authorPid.isPresent()) { + authorPid.get().add(p); + } else { + author.setPid(Lists.newArrayList(p)); + } + + } + return toaddpid; + } private static boolean containsAllowedPid(Author a) { Optional> pids = Optional.ofNullable(a.getPid()); From 7b288a94cba4aaaff32e256663ff71e37c28f651 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 26 May 2020 09:54:13 +0200 Subject: [PATCH 15/25] code formatting --- .../main/java/eu/dnetlib/dhp/schema/oaf/Result.java | 13 +++++++------ .../eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index 213a585a8c..11fdaa4f92 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -244,20 +244,20 @@ public class Result extends OafEntity implements Serializable { subject = mergeLists(subject, r.getSubject()); - //merge title lists: main title with higher trust and distinct between the others + // merge title lists: main title with higher trust and distinct between the others StructuredProperty baseMainTitle = null; - if(title != null) { + if (title != null) { baseMainTitle = getMainTitle(title); title.remove(baseMainTitle); } StructuredProperty newMainTitle = null; - if(r.getTitle() != null) { + if (r.getTitle() != null) { newMainTitle = getMainTitle(r.getTitle()); r.getTitle().remove(newMainTitle); } - if (newMainTitle != null && compareTrust(this, r) < 0 ) + if (newMainTitle != null && compareTrust(this, r) < 0) baseMainTitle = newMainTitle; title = mergeLists(title, r.getTitle()); @@ -314,8 +314,9 @@ public class Result extends OafEntity implements Serializable { } private StructuredProperty getMainTitle(List titles) { - //need to check if the list of titles contains more than 1 main title? (in that case, we should chose which main title select in the list) - for (StructuredProperty title: titles) { + // need to check if the list of titles contains more than 1 main title? (in that case, we should chose which + // main title select in the list) + for (StructuredProperty title : titles) { if (title.getQualifier() != null && title.getQualifier().getClassid() != null) if (title.getQualifier().getClassid().equals("main title")) return title; diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index f4b2c22522..b8ccb038d7 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -91,9 +91,9 @@ public class EntityMergerTest implements Serializable { assertEquals(pub_merged.getAuthor().size(), 9); assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4); - //verify title + // verify title int count = 0; - for (StructuredProperty title: pub_merged.getTitle()){ + for (StructuredProperty title : pub_merged.getTitle()) { if (title.getQualifier().getClassid().equals("main title")) count++; } From 55595d723599a3d0b30fbec81742829ad801e9b0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 26 May 2020 10:28:35 +0200 Subject: [PATCH 16/25] HACK: patch NULL values with defaults found in result.datainfo.deletedbyinference and result.context --- .../eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 1c65e8adec..4800def0ae 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag; import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.ArrayList; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -100,6 +101,7 @@ public class SparkBulkTagJob { ResultTagger resultTagger = new ResultTagger(); readPath(spark, inputPath, resultClazz) + .map(patchResult(), Encoders.bean(resultClazz)) .map( (MapFunction) value -> resultTagger .enrichContextCriteria( @@ -119,4 +121,17 @@ public class SparkBulkTagJob { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } + // TODO remove this hack as soon as the values fixed by this method will be provided as NON null + private static MapFunction patchResult() { + return (MapFunction) r -> { + if (r.getDataInfo().getDeletedbyinference() == null) { + r.getDataInfo().setDeletedbyinference(false); + } + if (r.getContext() == null) { + r.setContext(new ArrayList<>()); + } + return r; + }; + } + } From b8e541a454eb0745f42bd9e0ec869479633cd047 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 26 May 2020 10:30:09 +0200 Subject: [PATCH 17/25] fixing repeated organization.websiteurl in organization entities (#5645) as well as project.ecinternationalorganizationeurinterests --- .../eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 6f042b45cf..f99298130b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -769,7 +769,7 @@ public class XmlRecordFactory implements Serializable { XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); } if (o.getLogourl() != null) { - metadata.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getLogourl().getValue())); + metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue())); } if (o.getEclegalbody() != null) { @@ -801,13 +801,13 @@ public class XmlRecordFactory implements Serializable { .asXmlElement( "echighereducation", o.getEchighereducation().getValue())); } - if (o.getEcinternationalorganization() != null) { + if (o.getEcinternationalorganizationeurinterests() != null) { metadata .add( XmlSerializationUtils .asXmlElement( "ecinternationalorganizationeurinterests", - o.getEcinternationalorganization().getValue())); + o.getEcinternationalorganizationeurinterests().getValue())); } if (o.getEcinternationalorganization() != null) { metadata From 093f1aff03c21f969b6c30c0d6cf74e74d83e2bc Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 26 May 2020 13:06:55 +0200 Subject: [PATCH 18/25] result pids (new xpaths + IS vocabularies) --- .../raw/AbstractMdRecordToOafMapper.java | 167 ++++++++---------- .../raw/GenerateEntitiesApplication.java | 161 ++++++++++------- .../dhp/oa/graph/raw/OafToOafMapper.java | 33 ++-- .../dhp/oa/graph/raw/OdfToOafMapper.java | 54 +++--- .../graph/generate_entities_parameters.json | 7 +- 5 files changed, 217 insertions(+), 205 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 5c89d5096f..278544e0b0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -10,10 +10,27 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.NOT_AVAILABLE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -47,10 +64,8 @@ public abstract class AbstractMdRecordToOafMapper { protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; - protected static final Qualifier ORCID_PID_TYPE = qualifier( - "ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); - protected static final Qualifier MAG_PID_TYPE = qualifier( - "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); protected static final Map nsContext = new HashMap<>(); @@ -64,8 +79,7 @@ public abstract class AbstractMdRecordToOafMapper { nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3); } - protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( - "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); protected AbstractMdRecordToOafMapper(final Map code2name) { this.code2name = code2name; @@ -79,20 +93,15 @@ public abstract class AbstractMdRecordToOafMapper { .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); final String type = doc.valueOf("//dr:CobjCategory/@type"); - final KeyValue collectedFrom = getProvenanceDatasource( - doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - if (collectedFrom == null) { - return null; - } + if (collectedFrom == null) { return null; } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - if (hostedBy == null) { - return null; - } + if (hostedBy == null) { return null; } final DataInfo info = prepareDataInfo(doc); final long lastUpdateTimestamp = new Date().getTime(); @@ -107,9 +116,7 @@ public abstract class AbstractMdRecordToOafMapper { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { - return null; - } + if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { return null; } return keyValue(createOpenaireId(10, dsId, true), dsName); } @@ -125,47 +132,47 @@ public abstract class AbstractMdRecordToOafMapper { final List oafs = new ArrayList<>(); switch (type.toLowerCase()) { - case "publication": - final Publication p = new Publication(); - populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); - p.setJournal(prepareJournal(doc, info)); - oafs.add(p); - break; - case "dataset": - final Dataset d = new Dataset(); - populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - d.setResulttype(DATASET_DEFAULT_RESULTTYPE); - d.setStoragedate(prepareDatasetStorageDate(doc, info)); - d.setDevice(prepareDatasetDevice(doc, info)); - d.setSize(prepareDatasetSize(doc, info)); - d.setVersion(prepareDatasetVersion(doc, info)); - d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); - d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); - d.setGeolocation(prepareDatasetGeoLocations(doc, info)); - oafs.add(d); - break; - case "software": - final Software s = new Software(); - populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); - s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); - s.setLicense(prepareSoftwareLicenses(doc, info)); - s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); - s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); - oafs.add(s); - break; - case "": - case "otherresearchproducts": - default: - final OtherResearchProduct o = new OtherResearchProduct(); - populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - o.setResulttype(ORP_DEFAULT_RESULTTYPE); - o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); - o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); - o.setTool(prepareOtherResearchProductTools(doc, info)); - oafs.add(o); - break; + case "publication": + final Publication p = new Publication(); + populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); + p.setJournal(prepareJournal(doc, info)); + oafs.add(p); + break; + case "dataset": + final Dataset d = new Dataset(); + populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + d.setResulttype(DATASET_DEFAULT_RESULTTYPE); + d.setStoragedate(prepareDatasetStorageDate(doc, info)); + d.setDevice(prepareDatasetDevice(doc, info)); + d.setSize(prepareDatasetSize(doc, info)); + d.setVersion(prepareDatasetVersion(doc, info)); + d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); + d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); + d.setGeolocation(prepareDatasetGeoLocations(doc, info)); + oafs.add(d); + break; + case "software": + final Software s = new Software(); + populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); + s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); + s.setLicense(prepareSoftwareLicenses(doc, info)); + s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); + s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); + oafs.add(s); + break; + case "": + case "otherresearchproducts": + default: + final OtherResearchProduct o = new OtherResearchProduct(); + populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + o.setResulttype(ORP_DEFAULT_RESULTTYPE); + o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); + o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); + o.setTool(prepareOtherResearchProductTools(doc, info)); + oafs.add(o); + break; } if (!oafs.isEmpty()) { @@ -194,15 +201,9 @@ public abstract class AbstractMdRecordToOafMapper { final String projectId = createOpenaireId(40, originalId, true); res - .add( - getRelation( - docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, lastUpdateTimestamp)); res - .add( - getRelation( - projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, lastUpdateTimestamp)); } } @@ -247,10 +248,7 @@ public abstract class AbstractMdRecordToOafMapper { r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier"))); r.setCollectedfrom(Arrays.asList(collectedFrom)); - r - .setPid( - prepareListStructProps( - doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + r.setPid(prepareResultPids(doc, info)); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES @@ -278,6 +276,8 @@ public abstract class AbstractMdRecordToOafMapper { r.setBestaccessright(getBestAccessRights(instances)); } + protected abstract List prepareResultPids(Document doc, DataInfo info); + private List prepareContexts(final Document doc, final DataInfo info) { final List list = new ArrayList<>(); for (final Object o : doc.selectNodes("//oaf:concept")) { @@ -358,7 +358,7 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract Field prepareDatasetStorageDate(Document doc, DataInfo info); - protected static Qualifier getBestAccessRights(List instanceList) { + protected static Qualifier getBestAccessRights(final List instanceList) { if (instanceList != null) { final Optional min = instanceList .stream() @@ -398,9 +398,7 @@ public abstract class AbstractMdRecordToOafMapper { final String sp = n.valueOf("@sp"); final String vol = n.valueOf("@vol"); final String edition = n.valueOf("@edition"); - if (StringUtils.isNotBlank(name)) { - return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); - } + if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); } } return null; } @@ -453,10 +451,7 @@ public abstract class AbstractMdRecordToOafMapper { for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; res - .add( - structuredProperty( - n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), - n.valueOf("@schemename"), info)); + .add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info)); } return res; } @@ -464,9 +459,7 @@ public abstract class AbstractMdRecordToOafMapper { protected OAIProvenance prepareOAIprovenance(final Document doc) { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); - if (n == null) { - return null; - } + if (n == null) { return null; } final String identifier = n.valueOf("./*[local-name()='identifier']"); final String baseURL = n.valueOf("./*[local-name()='baseURL']"); @@ -481,9 +474,7 @@ public abstract class AbstractMdRecordToOafMapper { protected DataInfo prepareDataInfo(final Document doc) { final Node n = doc.selectSingleNode("//oaf:datainfo"); - if (n == null) { - return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); - } + if (n == null) { return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); @@ -495,9 +486,7 @@ public abstract class AbstractMdRecordToOafMapper { final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); final String trust = n.valueOf("./oaf:trust"); - return dataInfo( - deletedbyinference, inferenceprovenance, inferred, false, - qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); + return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); } protected Field prepareField(final Node node, final String xpath, final DataInfo info) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 739c7a4629..d184364174 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -5,7 +5,12 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; import java.sql.SQLException; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -27,7 +32,19 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.DbClient; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import scala.Tuple2; public class GenerateEntitiesApplication { @@ -39,14 +56,12 @@ public class GenerateEntitiesApplication { public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString( - MigrateMongoMdstoresApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json"))); + .toString(GenerateEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json"))); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); @@ -58,17 +73,17 @@ public class GenerateEntitiesApplication { final String dbUrl = parser.get("postgresUrl"); final String dbUser = parser.get("postgresUser"); final String dbPassword = parser.get("postgresPassword"); + final String isLookupUrl = parser.get("isLookupUrl"); - final Map code2name = loadClassNames(dbUrl, dbUser, dbPassword); + final Map code2name = loadVocsFromDB(dbUrl, dbUser, dbPassword); - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, targetPath); - generateEntities(spark, code2name, sourcePaths, targetPath); - }); + code2name.putAll(loadVocsFromIS(isLookupUrl)); + + final SparkConf conf = new SparkConf(); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, targetPath); + generateEntities(spark, code2name, sourcePaths, targetPath); + }); } private static void generateEntities( @@ -77,7 +92,7 @@ public class GenerateEntitiesApplication { final String sourcePaths, final String targetPath) { - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final List existingSourcePaths = Arrays .stream(sourcePaths.split(",")) .filter(p -> exists(sc, p)) @@ -90,27 +105,25 @@ public class GenerateEntitiesApplication { for (final String sp : existingSourcePaths) { inputRdd = inputRdd - .union( - sc - .sequenceFile(sp, Text.class, Text.class) - .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) - .map(k -> convertToListOaf(k._1(), k._2(), code2name)) - .filter(Objects::nonNull) - .flatMap(list -> list.iterator())); + .union(sc + .sequenceFile(sp, Text.class, Text.class) + .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) + .map(k -> convertToListOaf(k._1(), k._2(), code2name)) + .filter(Objects::nonNull) + .flatMap(list -> list.iterator())); } inputRdd .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) .reduceByKey((o1, o2) -> merge(o1, o2)) .map(Tuple2::_2) - .map( - oaf -> oaf.getClass().getSimpleName().toLowerCase() - + "|" - + OBJECT_MAPPER.writeValueAsString(oaf)) + .map(oaf -> oaf.getClass().getSimpleName().toLowerCase() + + "|" + + OBJECT_MAPPER.writeValueAsString(oaf)) .saveAsTextFile(targetPath, GzipCodec.class); } - private static Oaf merge(Oaf o1, Oaf o2) { + private static Oaf merge(final Oaf o1, final Oaf o2) { if (ModelSupport.isSubClass(o1, OafEntity.class)) { ((OafEntity) o1).mergeFrom((OafEntity) o2); } else if (ModelSupport.isSubClass(o1, Relation.class)) { @@ -122,37 +135,41 @@ public class GenerateEntitiesApplication { } private static List convertToListOaf( - final String id, final String s, final Map code2name) { + final String id, + final String s, + final Map code2name) { final String type = StringUtils.substringAfter(id, ":"); switch (type.toLowerCase()) { - case "native_oaf": - return new OafToOafMapper(code2name).processMdRecord(s); - case "native_odf": - return new OdfToOafMapper(code2name).processMdRecord(s); - case "datasource": - return Arrays.asList(convertFromJson(s, Datasource.class)); - case "organization": - return Arrays.asList(convertFromJson(s, Organization.class)); - case "project": - return Arrays.asList(convertFromJson(s, Project.class)); - case "relation": - return Arrays.asList(convertFromJson(s, Relation.class)); - case "publication": - return Arrays.asList(convertFromJson(s, Publication.class)); - case "dataset": - return Arrays.asList(convertFromJson(s, Dataset.class)); - case "software": - return Arrays.asList(convertFromJson(s, Software.class)); - case "otherresearchproduct": - return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); - default: - throw new RuntimeException("type not managed: " + type.toLowerCase()); + case "native_oaf": + return new OafToOafMapper(code2name).processMdRecord(s); + case "native_odf": + return new OdfToOafMapper(code2name).processMdRecord(s); + case "datasource": + return Arrays.asList(convertFromJson(s, Datasource.class)); + case "organization": + return Arrays.asList(convertFromJson(s, Organization.class)); + case "project": + return Arrays.asList(convertFromJson(s, Project.class)); + case "relation": + return Arrays.asList(convertFromJson(s, Relation.class)); + case "publication": + return Arrays.asList(convertFromJson(s, Publication.class)); + case "dataset": + return Arrays.asList(convertFromJson(s, Dataset.class)); + case "software": + return Arrays.asList(convertFromJson(s, Software.class)); + case "otherresearchproduct": + return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); + default: + throw new RuntimeException("type not managed: " + type.toLowerCase()); } } - private static Map loadClassNames( - final String dbUrl, final String dbUser, final String dbPassword) throws IOException { + private static Map loadVocsFromDB( + final String dbUrl, + final String dbUser, + final String dbPassword) throws IOException { log.info("Loading vocabulary terms from db..."); @@ -160,15 +177,13 @@ public class GenerateEntitiesApplication { try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) { dbClient - .processResults( - "select code, name from class", - rs -> { - try { - map.put(rs.getString("code"), rs.getString("name")); - } catch (final SQLException e) { - e.printStackTrace(); - } - }); + .processResults("select code, name from class", rs -> { + try { + map.put(rs.getString("code"), rs.getString("name")); + } catch (final SQLException e) { + e.printStackTrace(); + } + }); } log.info("Found " + map.size() + " terms."); @@ -176,6 +191,24 @@ public class GenerateEntitiesApplication { return map; } + private static Map loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { + final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); + + final String xquery = + IOUtils.toString(GenerateEntitiesApplication.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); + + final Map map = new HashMap<>(); + + for (final String s : isLookUpService.quickSearchProfile(xquery)) { + final String[] arr = s.split("@=@"); + if (arr.length == 4) { + map.put(arr[2].trim(), arr[3].trim()); + } + } + + return map; + } + private static Oaf convertFromJson(final String s, final Class clazz) { try { return OBJECT_MAPPER.readValue(s, clazz); @@ -196,7 +229,7 @@ public class GenerateEntitiesApplication { } } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index af9fe7197e..61fe08722e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -130,8 +130,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype( - prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); @@ -147,14 +146,13 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); instance - .setUrl( - nodes - .stream() - .filter(n -> StringUtils.isNotBlank(n.getText())) - .map(n -> n.getText().trim()) - .filter(u -> u.startsWith("http")) - .distinct() - .collect(Collectors.toCollection(ArrayList::new))); + .setUrl(nodes + .stream() + .filter(n -> StringUtils.isNotBlank(n.getText())) + .map(n -> n.getText().trim()) + .filter(u -> u.startsWith("http")) + .distinct() + .collect(Collectors.toCollection(ArrayList::new))); return Lists.newArrayList(instance); } @@ -279,15 +277,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final String otherId = createOpenaireId(50, originalId, false); res - .add( - getRelation( - docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp)); res - .add( - getRelation( - otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp)); } } return res; @@ -297,4 +289,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { return null; // NOT PRESENT IN OAF } + + @Override + protected List prepareResultPids(final Document doc, final DataInfo info) { + return prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 9c74c4a938..9026874366 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -120,8 +120,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype( - prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); @@ -170,10 +169,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { && !dateType.equalsIgnoreCase("Updated") && !dateType.equalsIgnoreCase("Available")) { res - .add( - structuredProperty( - ((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, - info)); + .add(structuredProperty(((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info)); } } return res; @@ -225,16 +221,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected List> prepareOtherResearchProductContactGroups( final Document doc, final DataInfo info) { - return prepareListFields( - doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); + return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); } @Override protected List> prepareOtherResearchProductContactPersons( final Document doc, final DataInfo info) { - return prepareListFields( - doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); + return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); } @Override @@ -260,8 +254,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected List> prepareSoftwareDocumentationUrls( final Document doc, final DataInfo info) { - return prepareListFields( - doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); + return prepareListFields(doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); } // DATASETS @@ -335,29 +328,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { if (type.equalsIgnoreCase("IsSupplementTo")) { res - .add( - getRelation( - docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, lastUpdateTimestamp)); res - .add( - getRelation( - otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, lastUpdateTimestamp)); } else if (type.equals("IsPartOf")) { res - .add( - getRelation( - docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, lastUpdateTimestamp)); res - .add( - getRelation( - otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, - lastUpdateTimestamp)); - } else { - } + .add(getRelation(otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, lastUpdateTimestamp)); + } else {} } } return res; @@ -365,8 +345,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { - return prepareQualifier( - doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, - DNET_DATA_CITE_RESOURCE); + return prepareQualifier(doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, DNET_DATA_CITE_RESOURCE); } + + @Override + protected List prepareResultPids(final Document doc, final DataInfo info) { + final List res = new ArrayList<>(); + res.addAll(prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + res.addAll(prepareListStructProps(doc, "//datacite:identifier[@identifierType != 'URL']", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + res.addAll(prepareListStructProps(doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL']", "@alternateIdentifierType", "dnet:pid_types", "dnet:pid_types", info)); + return res; + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json index 293bf041b9..d4d0935a0e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json @@ -34,6 +34,11 @@ "paramLongName": "postgresPassword", "paramDescription": "postgres password", "paramRequired": false + }, + { + "paramName": "islookup", + "paramLongName": "isLookupUrl", + "paramDescription": "the url of the ISLookupService", + "paramRequired": true } - ] \ No newline at end of file From c6af36496a6b03b497043b735070afaff939ca7d Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 26 May 2020 13:11:09 +0200 Subject: [PATCH 19/25] result pids (new xpaths + IS vocabularies) --- .../eu/dnetlib/dhp/schema/oaf/Result.java | 13 +- .../dhp/oa/dedup/EntityMergerTest.java | 4 +- .../raw/AbstractMdRecordToOafMapper.java | 137 +++++++++++------- .../raw/GenerateEntitiesApplication.java | 75 +++++----- .../dhp/oa/graph/raw/OafToOafMapper.java | 31 ++-- .../dhp/oa/graph/raw/OdfToOafMapper.java | 61 ++++++-- 6 files changed, 198 insertions(+), 123 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index 213a585a8c..11fdaa4f92 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -244,20 +244,20 @@ public class Result extends OafEntity implements Serializable { subject = mergeLists(subject, r.getSubject()); - //merge title lists: main title with higher trust and distinct between the others + // merge title lists: main title with higher trust and distinct between the others StructuredProperty baseMainTitle = null; - if(title != null) { + if (title != null) { baseMainTitle = getMainTitle(title); title.remove(baseMainTitle); } StructuredProperty newMainTitle = null; - if(r.getTitle() != null) { + if (r.getTitle() != null) { newMainTitle = getMainTitle(r.getTitle()); r.getTitle().remove(newMainTitle); } - if (newMainTitle != null && compareTrust(this, r) < 0 ) + if (newMainTitle != null && compareTrust(this, r) < 0) baseMainTitle = newMainTitle; title = mergeLists(title, r.getTitle()); @@ -314,8 +314,9 @@ public class Result extends OafEntity implements Serializable { } private StructuredProperty getMainTitle(List titles) { - //need to check if the list of titles contains more than 1 main title? (in that case, we should chose which main title select in the list) - for (StructuredProperty title: titles) { + // need to check if the list of titles contains more than 1 main title? (in that case, we should chose which + // main title select in the list) + for (StructuredProperty title : titles) { if (title.getQualifier() != null && title.getQualifier().getClassid() != null) if (title.getQualifier().getClassid().equals("main title")) return title; diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index f4b2c22522..b8ccb038d7 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -91,9 +91,9 @@ public class EntityMergerTest implements Serializable { assertEquals(pub_merged.getAuthor().size(), 9); assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4); - //verify title + // verify title int count = 0; - for (StructuredProperty title: pub_merged.getTitle()){ + for (StructuredProperty title : pub_merged.getTitle()) { if (title.getQualifier().getClassid().equals("main title")) count++; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 278544e0b0..99334dd7f9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -64,8 +64,10 @@ public abstract class AbstractMdRecordToOafMapper { protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; - protected static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); - protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier ORCID_PID_TYPE = qualifier( + "ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier MAG_PID_TYPE = qualifier( + "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); protected static final Map nsContext = new HashMap<>(); @@ -79,7 +81,8 @@ public abstract class AbstractMdRecordToOafMapper { nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3); } - protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( + "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); protected AbstractMdRecordToOafMapper(final Map code2name) { this.code2name = code2name; @@ -93,15 +96,20 @@ public abstract class AbstractMdRecordToOafMapper { .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); final String type = doc.valueOf("//dr:CobjCategory/@type"); - final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + final KeyValue collectedFrom = getProvenanceDatasource( + doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - if (collectedFrom == null) { return null; } + if (collectedFrom == null) { + return null; + } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - if (hostedBy == null) { return null; } + if (hostedBy == null) { + return null; + } final DataInfo info = prepareDataInfo(doc); final long lastUpdateTimestamp = new Date().getTime(); @@ -116,7 +124,9 @@ public abstract class AbstractMdRecordToOafMapper { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { return null; } + if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { + return null; + } return keyValue(createOpenaireId(10, dsId, true), dsName); } @@ -132,47 +142,47 @@ public abstract class AbstractMdRecordToOafMapper { final List oafs = new ArrayList<>(); switch (type.toLowerCase()) { - case "publication": - final Publication p = new Publication(); - populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); - p.setJournal(prepareJournal(doc, info)); - oafs.add(p); - break; - case "dataset": - final Dataset d = new Dataset(); - populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - d.setResulttype(DATASET_DEFAULT_RESULTTYPE); - d.setStoragedate(prepareDatasetStorageDate(doc, info)); - d.setDevice(prepareDatasetDevice(doc, info)); - d.setSize(prepareDatasetSize(doc, info)); - d.setVersion(prepareDatasetVersion(doc, info)); - d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); - d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); - d.setGeolocation(prepareDatasetGeoLocations(doc, info)); - oafs.add(d); - break; - case "software": - final Software s = new Software(); - populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); - s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); - s.setLicense(prepareSoftwareLicenses(doc, info)); - s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); - s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); - oafs.add(s); - break; - case "": - case "otherresearchproducts": - default: - final OtherResearchProduct o = new OtherResearchProduct(); - populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - o.setResulttype(ORP_DEFAULT_RESULTTYPE); - o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); - o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); - o.setTool(prepareOtherResearchProductTools(doc, info)); - oafs.add(o); - break; + case "publication": + final Publication p = new Publication(); + populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); + p.setJournal(prepareJournal(doc, info)); + oafs.add(p); + break; + case "dataset": + final Dataset d = new Dataset(); + populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + d.setResulttype(DATASET_DEFAULT_RESULTTYPE); + d.setStoragedate(prepareDatasetStorageDate(doc, info)); + d.setDevice(prepareDatasetDevice(doc, info)); + d.setSize(prepareDatasetSize(doc, info)); + d.setVersion(prepareDatasetVersion(doc, info)); + d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); + d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); + d.setGeolocation(prepareDatasetGeoLocations(doc, info)); + oafs.add(d); + break; + case "software": + final Software s = new Software(); + populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); + s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); + s.setLicense(prepareSoftwareLicenses(doc, info)); + s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); + s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); + oafs.add(s); + break; + case "": + case "otherresearchproducts": + default: + final OtherResearchProduct o = new OtherResearchProduct(); + populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + o.setResulttype(ORP_DEFAULT_RESULTTYPE); + o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); + o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); + o.setTool(prepareOtherResearchProductTools(doc, info)); + oafs.add(o); + break; } if (!oafs.isEmpty()) { @@ -201,9 +211,15 @@ public abstract class AbstractMdRecordToOafMapper { final String projectId = createOpenaireId(40, originalId, true); res - .add(getRelation(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, + lastUpdateTimestamp)); res - .add(getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, + lastUpdateTimestamp)); } } @@ -398,7 +414,9 @@ public abstract class AbstractMdRecordToOafMapper { final String sp = n.valueOf("@sp"); final String vol = n.valueOf("@vol"); final String edition = n.valueOf("@edition"); - if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); } + if (StringUtils.isNotBlank(name)) { + return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); + } } return null; } @@ -451,7 +469,10 @@ public abstract class AbstractMdRecordToOafMapper { for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; res - .add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info)); + .add( + structuredProperty( + n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), + n.valueOf("@schemename"), info)); } return res; } @@ -459,7 +480,9 @@ public abstract class AbstractMdRecordToOafMapper { protected OAIProvenance prepareOAIprovenance(final Document doc) { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); - if (n == null) { return null; } + if (n == null) { + return null; + } final String identifier = n.valueOf("./*[local-name()='identifier']"); final String baseURL = n.valueOf("./*[local-name()='baseURL']"); @@ -474,7 +497,9 @@ public abstract class AbstractMdRecordToOafMapper { protected DataInfo prepareDataInfo(final Document doc) { final Node n = doc.selectSingleNode("//oaf:datainfo"); - if (n == null) { return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } + if (n == null) { + return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); + } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); @@ -486,7 +511,9 @@ public abstract class AbstractMdRecordToOafMapper { final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); final String trust = n.valueOf("./oaf:trust"); - return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); + return dataInfo( + deletedbyinference, inferenceprovenance, inferred, false, + qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); } protected Field prepareField(final Node node, final String xpath, final DataInfo info) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index d184364174..0d140e9adc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -56,8 +56,9 @@ public class GenerateEntitiesApplication { public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(GenerateEntitiesApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json"))); + .toString( + GenerateEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json"))); parser.parseArgument(args); @@ -105,21 +106,23 @@ public class GenerateEntitiesApplication { for (final String sp : existingSourcePaths) { inputRdd = inputRdd - .union(sc - .sequenceFile(sp, Text.class, Text.class) - .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) - .map(k -> convertToListOaf(k._1(), k._2(), code2name)) - .filter(Objects::nonNull) - .flatMap(list -> list.iterator())); + .union( + sc + .sequenceFile(sp, Text.class, Text.class) + .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) + .map(k -> convertToListOaf(k._1(), k._2(), code2name)) + .filter(Objects::nonNull) + .flatMap(list -> list.iterator())); } inputRdd .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) .reduceByKey((o1, o2) -> merge(o1, o2)) .map(Tuple2::_2) - .map(oaf -> oaf.getClass().getSimpleName().toLowerCase() - + "|" - + OBJECT_MAPPER.writeValueAsString(oaf)) + .map( + oaf -> oaf.getClass().getSimpleName().toLowerCase() + + "|" + + OBJECT_MAPPER.writeValueAsString(oaf)) .saveAsTextFile(targetPath, GzipCodec.class); } @@ -141,28 +144,28 @@ public class GenerateEntitiesApplication { final String type = StringUtils.substringAfter(id, ":"); switch (type.toLowerCase()) { - case "native_oaf": - return new OafToOafMapper(code2name).processMdRecord(s); - case "native_odf": - return new OdfToOafMapper(code2name).processMdRecord(s); - case "datasource": - return Arrays.asList(convertFromJson(s, Datasource.class)); - case "organization": - return Arrays.asList(convertFromJson(s, Organization.class)); - case "project": - return Arrays.asList(convertFromJson(s, Project.class)); - case "relation": - return Arrays.asList(convertFromJson(s, Relation.class)); - case "publication": - return Arrays.asList(convertFromJson(s, Publication.class)); - case "dataset": - return Arrays.asList(convertFromJson(s, Dataset.class)); - case "software": - return Arrays.asList(convertFromJson(s, Software.class)); - case "otherresearchproduct": - return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); - default: - throw new RuntimeException("type not managed: " + type.toLowerCase()); + case "native_oaf": + return new OafToOafMapper(code2name).processMdRecord(s); + case "native_odf": + return new OdfToOafMapper(code2name).processMdRecord(s); + case "datasource": + return Arrays.asList(convertFromJson(s, Datasource.class)); + case "organization": + return Arrays.asList(convertFromJson(s, Organization.class)); + case "project": + return Arrays.asList(convertFromJson(s, Project.class)); + case "relation": + return Arrays.asList(convertFromJson(s, Relation.class)); + case "publication": + return Arrays.asList(convertFromJson(s, Publication.class)); + case "dataset": + return Arrays.asList(convertFromJson(s, Dataset.class)); + case "software": + return Arrays.asList(convertFromJson(s, Software.class)); + case "otherresearchproduct": + return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); + default: + throw new RuntimeException("type not managed: " + type.toLowerCase()); } } @@ -194,8 +197,10 @@ public class GenerateEntitiesApplication { private static Map loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); - final String xquery = - IOUtils.toString(GenerateEntitiesApplication.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); + final String xquery = IOUtils + .toString( + GenerateEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); final Map map = new HashMap<>(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 61fe08722e..3b8f1170f7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -130,7 +130,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype( + prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); @@ -146,13 +147,14 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); instance - .setUrl(nodes - .stream() - .filter(n -> StringUtils.isNotBlank(n.getText())) - .map(n -> n.getText().trim()) - .filter(u -> u.startsWith("http")) - .distinct() - .collect(Collectors.toCollection(ArrayList::new))); + .setUrl( + nodes + .stream() + .filter(n -> StringUtils.isNotBlank(n.getText())) + .map(n -> n.getText().trim()) + .filter(u -> u.startsWith("http")) + .distinct() + .collect(Collectors.toCollection(ArrayList::new))); return Lists.newArrayList(instance); } @@ -277,9 +279,15 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final String otherId = createOpenaireId(50, originalId, false); res - .add(getRelation(docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, + lastUpdateTimestamp)); res - .add(getRelation(otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, + lastUpdateTimestamp)); } } return res; @@ -292,6 +300,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareResultPids(final Document doc, final DataInfo info) { - return prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info); + return prepareListStructProps( + doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 9026874366..f8d9b3cd92 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -120,7 +120,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype( + prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); @@ -169,7 +170,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { && !dateType.equalsIgnoreCase("Updated") && !dateType.equalsIgnoreCase("Available")) { res - .add(structuredProperty(((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info)); + .add( + structuredProperty( + ((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, + info)); } } return res; @@ -221,14 +225,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected List> prepareOtherResearchProductContactGroups( final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); + return prepareListFields( + doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); } @Override protected List> prepareOtherResearchProductContactPersons( final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); + return prepareListFields( + doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); } @Override @@ -254,7 +260,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected List> prepareSoftwareDocumentationUrls( final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); + return prepareListFields( + doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); } // DATASETS @@ -328,16 +335,29 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { if (type.equalsIgnoreCase("IsSupplementTo")) { res - .add(getRelation(docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, + lastUpdateTimestamp)); res - .add(getRelation(otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, + lastUpdateTimestamp)); } else if (type.equals("IsPartOf")) { res - .add(getRelation(docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, + lastUpdateTimestamp)); res - .add(getRelation(otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, lastUpdateTimestamp)); - } else {} + .add( + getRelation( + otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, + lastUpdateTimestamp)); + } else { + } } } return res; @@ -345,15 +365,28 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { - return prepareQualifier(doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, DNET_DATA_CITE_RESOURCE); + return prepareQualifier( + doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, + DNET_DATA_CITE_RESOURCE); } @Override protected List prepareResultPids(final Document doc, final DataInfo info) { final List res = new ArrayList<>(); - res.addAll(prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); - res.addAll(prepareListStructProps(doc, "//datacite:identifier[@identifierType != 'URL']", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); - res.addAll(prepareListStructProps(doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL']", "@alternateIdentifierType", "dnet:pid_types", "dnet:pid_types", info)); + res + .addAll( + prepareListStructProps( + doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + res + .addAll( + prepareListStructProps( + doc, "//datacite:identifier[@identifierType != 'URL']", "@identifierType", "dnet:pid_types", + "dnet:pid_types", info)); + res + .addAll( + prepareListStructProps( + doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL']", + "@alternateIdentifierType", "dnet:pid_types", "dnet:pid_types", info)); return res; } From c15d997925722b77262ab8c068d844ef4274de6a Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 26 May 2020 13:13:17 +0200 Subject: [PATCH 20/25] xquery --- .../eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery new file mode 100644 index 0000000000..4938c0aa42 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery @@ -0,0 +1,5 @@ +for $x in collection(' /db/DRIVER/VocabularyDSResources/VocabularyDSResourceType') + let $vocid := $x//VOCABULARY_NAME/@code + let $vocname := $x//VOCABULARY_NAME/text() + for $term in ($x//TERM) + return concat($vocid,' @=@ ',$vocname,' @=@ ',$term/@code,' @=@ ',$term/@english_name) From 4e36d689dd52957417a8e1a5ac09c64a03704058 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 26 May 2020 18:30:40 +0200 Subject: [PATCH 21/25] fixed XML serialization for children sub-elements (duplicates & externalreferences) --- .../dhp/oa/provision/template/child.st | 2 +- .../oa/provision/XmlRecordFactoryTest.java | 47 + .../dhp/oa/provision/joined_entity.json | 1551 +++++++++++++++++ 3 files changed, 1599 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/joined_entity.json diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/child.st b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/child.st index 1d3cffea09..0af39f2306 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/child.st +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/child.st @@ -1,3 +1,3 @@ <$name$$if(hasId)$ objidentifier="$id$"$else$$endif$> - $metadata:{$it$}$ + $metadata:{ it | $it$ }$ \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java new file mode 100644 index 0000000000..f485ea6804 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -0,0 +1,47 @@ +package eu.dnetlib.dhp.oa.provision; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; +import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import org.apache.commons.io.IOUtils; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.io.SAXReader; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.StringReader; + +import static org.junit.jupiter.api.Assertions.*; + +public class XmlRecordFactoryTest { + + private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; + + @Test + public void testXMLRecordFactory() throws IOException, DocumentException { + + String json = IOUtils.toString(getClass().getResourceAsStream("joined_entity.json")); + + assertNotNull(json); + JoinedEntity je = new ObjectMapper().readValue(json, JoinedEntity.class); + assertNotNull(je); + + ContextMapper contextMapper = new ContextMapper(); + + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, otherDsTypeId); + + String xml = xmlRecordFactory.build(je); + + assertNotNull(xml); + + Document doc = new SAXReader().read(new StringReader(xml)); + + assertNotNull(doc); + + System.out.println(doc.asXML()); + + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/joined_entity.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/joined_entity.json new file mode 100644 index 0000000000..c512646988 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/joined_entity.json @@ -0,0 +1,1551 @@ +{ + "links": [ + { + "relatedEntity": { + "code": null, + "codeRepositoryUrl": null, + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:crosswalk", + "classname": "sysimport:crosswalk", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "0.9" + }, + "qualifier": { + "classid": "pmc", + "classname": "pmc", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "PMC17177" + }, + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:crosswalk", + "classname": "sysimport:crosswalk", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "0.9" + }, + "qualifier": { + "classid": "pmid", + "classname": "pmid", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "11005843" + } + ], + "projectTitle": null, + "websiteurl": null, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemename": "dnet:result_typologies", + "schemeid": "dnet:result_typologies" + }, + "legalname": null, + "collectedfrom": [ + { + "dataInfo": null, + "value": "PubMed Central", + "key": "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357" + } + ], + "id": "50|od_______267::4d85ada0191a351f529d1e8ace1a7117", + "title": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:crosswalk", + "classname": "sysimport:crosswalk", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemename": "dnet:dataCite_title", + "schemeid": "dnet:dataCite_title" + }, + "value": "Induction of ribosomal genes and hepatocyte hypertrophy by adenovirus-mediated expression of c-Myc in vivo" + }, + "fundingtree": null, + "contracttype": null, + "type": "publication", + "acronym": null, + "openairecompatibility": null, + "publisher": "The National Academy of Sciences", + "instances": [ + { + "refereed": null, + "hostedby": { + "dataInfo": null, + "value": "Europe PubMed Central", + "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c" + }, + "processingchargeamount": null, + "license": null, + "processingchargecurrency": null, + "distributionlocation": "", + "url": [ + "https://europepmc.org/articles/PMC17177/" + ], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:crosswalk", + "classname": "sysimport:crosswalk", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "0.9" + }, + "value": "2000-09-26" + }, + "collectedfrom": { + "dataInfo": null, + "value": "PubMed Central", + "key": "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357" + }, + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0038", + "classname": "Other literature type", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + } + ], + "legalshortname": null, + "country": null, + "dateofacceptance": "2000-09-26", + "datasourcetype": null, + "datasourcetypeui": null, + "officialname": null + }, + "relation": { + "subRelType": "dedup", + "relClass": "merges", + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:dedup", + "classname": "sysimport:dedup", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": true, + "inferenceprovenance": "decisiontree-dedup-test", + "invisible": false, + "trust": null + }, + "target": "50|od_______267::4d85ada0191a351f529d1e8ace1a7117", + "lastupdatetimestamp": null, + "relType": "resultResult", + "source": "50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10", + "collectedfrom": null, + "properties": [] + } + }, + { + "relatedEntity": { + "code": null, + "codeRepositoryUrl": null, + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "pmid", + "classname": "pmid", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "11005843" + } + ], + "projectTitle": null, + "websiteurl": null, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemename": "dnet:result_typologies", + "schemeid": "dnet:result_typologies" + }, + "legalname": null, + "collectedfrom": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "scholExplorer", + "key": "10|openaire____::e034d6a11054f5ade9221ebac484e864" + } + ], + "id": "50|scholexplore::ef20f9d1cd983037b45cccce4e3f5f8a", + "title": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemename": "dnet:dataCite_title", + "schemeid": "dnet:dataCite_title" + }, + "value": "Induction of ribosomal genes and hepatocyte hypertrophy by adenovirus-mediated expression of c-Myc in vivo." + }, + "fundingtree": null, + "contracttype": null, + "type": "publication", + "acronym": null, + "openairecompatibility": null, + "publisher": "", + "instances": [ + { + "refereed": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Unknown Repository", + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c" + }, + "processingchargeamount": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "processingchargecurrency": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "distributionlocation": "", + "url": [ + "https://www.ncbi.nlm.nih.gov/pubmed/11005843" + ], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "scholExplorer", + "key": "10|openaire____::e034d6a11054f5ade9221ebac484e864" + }, + "accessright": { + "classid": "UNKNOWN", + "classname": "not available", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0000", + "classname": "Unknown", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + } + ], + "legalshortname": null, + "country": null, + "dateofacceptance": "", + "datasourcetype": null, + "datasourcetypeui": null, + "officialname": null + }, + "relation": { + "subRelType": "dedup", + "relClass": "merges", + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:dedup", + "classname": "sysimport:dedup", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": true, + "inferenceprovenance": "decisiontree-dedup-test", + "invisible": false, + "trust": null + }, + "target": "50|scholexplore::ef20f9d1cd983037b45cccce4e3f5f8a", + "lastupdatetimestamp": null, + "relType": "resultResult", + "source": "50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10", + "collectedfrom": null, + "properties": [] + } + }, + { + "relatedEntity": { + "code": null, + "codeRepositoryUrl": null, + "pid": [], + "projectTitle": null, + "websiteurl": null, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemename": "dnet:result_typologies", + "schemeid": "dnet:result_typologies" + }, + "legalname": null, + "collectedfrom": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "ORCID", + "key": "10|openaire____::806360c771262b4d6770e7cdf04b5c5a" + } + ], + "id": "50|orcid_______::631fd913d925af01a94ea70aa3cec3d6", + "title": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemename": "dnet:dataCite_title", + "schemeid": "dnet:dataCite_title" + }, + "value": "Induction of ribosomal genes and hepatocyte hypertrophy by adenovirus-mediated expression of c-Myc in vivo" + }, + "fundingtree": null, + "contracttype": null, + "type": "publication", + "acronym": null, + "openairecompatibility": null, + "publisher": "", + "instances": [ + { + "refereed": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Unknown Repository", + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c" + }, + "processingchargeamount": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "processingchargecurrency": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "distributionlocation": "", + "url": [], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "2000-01-01" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "ORCID", + "key": "10|openaire____::806360c771262b4d6770e7cdf04b5c5a" + }, + "accessright": { + "classid": "UNKNOWN", + "classname": "UNKNOWN", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + } + ], + "legalshortname": null, + "country": null, + "dateofacceptance": "2000-01-01", + "datasourcetype": null, + "datasourcetypeui": null, + "officialname": null + }, + "relation": { + "subRelType": "dedup", + "relClass": "merges", + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:dedup", + "classname": "sysimport:dedup", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": true, + "inferenceprovenance": "decisiontree-dedup-test", + "invisible": false, + "trust": null + }, + "target": "50|orcid_______::631fd913d925af01a94ea70aa3cec3d6", + "lastupdatetimestamp": null, + "relType": "resultResult", + "source": "50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10", + "collectedfrom": null, + "properties": [] + } + }, + { + "relatedEntity": { + "code": null, + "codeRepositoryUrl": null, + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "mag_id", + "classname": "Microsoft Academic Graph Identifier", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "https://academic.microsoft.com/#/detail/145311948" + }, + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "urn", + "classname": "urn", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "http://en.wikipedia.org/wiki/Johns_Hopkins_University" + }, + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "grid", + "classname": "grid", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "grid.21107.35" + } + ], + "projectTitle": null, + "websiteurl": "http://www.jhu.edu/", + "resulttype": null, + "legalname": "Johns Hopkins University", + "collectedfrom": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Microsoft Academic Graph", + "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a" + }, + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "GRID - Global Research Identifier Database", + "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977" + }, + { + "dataInfo": null, + "value": "Registry of Research Data Repository", + "key": "10|openaire____::21f8a223b9925c2f87c404096080b046" + }, + { + "dataInfo": null, + "value": "Research Councils UK", + "key": "10|openaire____::ab2d3310741ea80d3b8726f651502858" + }, + { + "dataInfo": null, + "value": "CORDA - COmmon Research DAta Warehouse", + "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f" + }, + { + "dataInfo": null, + "value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", + "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078" + }, + { + "dataInfo": null, + "value": "NSF - National Science Foundation", + "key": "10|openaire____::dd69b4a1513c9de9f46faf24048da1e8" + }, + { + "dataInfo": null, + "value": "DOAJ-Articles", + "key": "10|driver______::bee53aa31dc2cbb538c10c2b65fa5824" + }, + { + "dataInfo": null, + "value": "OpenDOAR", + "key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb" + } + ], + "id": "20|dedup_wf_001::8c05abe4d8f889305207a845e9e31d9d", + "title": null, + "fundingtree": null, + "contracttype": null, + "type": "organization", + "acronym": null, + "openairecompatibility": null, + "publisher": null, + "instances": null, + "legalshortname": "JHU", + "country": { + "classid": "US", + "classname": "United States", + "schemename": "dnet:countries", + "schemeid": "dnet:countries" + }, + "dateofacceptance": null, + "datasourcetype": null, + "datasourcetypeui": null, + "officialname": null + }, + "relation": { + "subRelType": "affiliation", + "relClass": "hasAuthorInstitution", + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "0.9" + }, + "target": "20|dedup_wf_001::8c05abe4d8f889305207a845e9e31d9d", + "lastupdatetimestamp": 0, + "relType": "resultOrganization", + "source": "50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10", + "collectedfrom": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Microsoft Academic Graph", + "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a" + } + ], + "properties": [] + } + }, + { + "relatedEntity": { + "code": null, + "codeRepositoryUrl": null, + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemename": "dnet:pid_types", + "schemeid": "dnet:pid_types" + }, + "value": "10.1073/pnas.200372597" + } + ], + "projectTitle": null, + "websiteurl": null, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemename": "dnet:result_typologies", + "schemeid": "dnet:result_typologies" + }, + "legalname": null, + "collectedfrom": [ + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Crossref", + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" + }, + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Microsoft Academic Graph", + "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a" + }, + { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "UnpayWall", + "key": "10|openaire____::8ac8380272269217cb09a928c8caa993" + } + ], + "id": "50|doiboost____::4317e3fa670267960efa09c1fd8339c9", + "title": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemename": "dnet:dataCite_title", + "schemeid": "dnet:dataCite_title" + }, + "value": "Induction of ribosomal genes and hepatocyte hypertrophy by adenovirus-mediated expression of c-Myc in vivo" + }, + "fundingtree": null, + "contracttype": null, + "type": "publication", + "acronym": null, + "openairecompatibility": null, + "publisher": "", + "instances": [ + { + "refereed": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Unknown Repository", + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c" + }, + "processingchargeamount": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "processingchargecurrency": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "distributionlocation": "", + "url": [ + "http://www.pnas.org/content/97/21/11198.full.pdf" + ], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "UnpayWall", + "key": "10|openaire____::8ac8380272269217cb09a928c8caa993" + }, + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + }, + { + "refereed": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Unknown Repository", + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c" + }, + "processingchargeamount": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "processingchargecurrency": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "distributionlocation": "", + "url": [ + "https://syndication.highwire.org/content/doi/10.1073/pnas.200372597" + ], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Crossref", + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" + }, + "accessright": { + "classid": "UNKNOWN", + "classname": "not available", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + }, + { + "refereed": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Unknown Repository", + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c" + }, + "processingchargeamount": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "processingchargecurrency": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "distributionlocation": "", + "url": [ + "https://academic.microsoft.com/#/detail/2045899555" + ], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Microsoft Academic Graph", + "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a" + }, + "accessright": { + "classid": "UNKNOWN", + "classname": "not available", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + }, + { + "refereed": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Unknown Repository", + "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c" + }, + "processingchargeamount": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "processingchargecurrency": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "distributionlocation": "", + "url": [ + "http://dx.doi.org/10.1073/pnas.200372597" + ], + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemename": "", + "schemeid": "" + }, + "inferred": false, + "inferenceprovenance": "", + "invisible": false, + "trust": "" + }, + "value": "Crossref", + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" + }, + "accessright": { + "classid": "RESTRICTED", + "classname": "Restricted", + "schemename": "dnet:access_modes", + "schemeid": "dnet:access_modes" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemename": "dnet:publication_resource", + "schemeid": "dnet:publication_resource" + } + } + ], + "legalshortname": null, + "country": null, + "dateofacceptance": "2000-9-26", + "datasourcetype": null, + "datasourcetypeui": null, + "officialname": null + }, + "relation": { + "subRelType": "dedup", + "relClass": "merges", + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": { + "classid": "sysimport:dedup", + "classname": "sysimport:dedup", + "schemename": "dnet:provenanceActions", + "schemeid": "dnet:provenanceActions" + }, + "inferred": true, + "inferenceprovenance": "decisiontree-dedup-test", + "invisible": false, + "trust": null + }, + "target": "50|doiboost____::4317e3fa670267960efa09c1fd8339c9", + "lastupdatetimestamp": null, + "relType": "resultResult", + "source": "50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10", + "collectedfrom": null, + "properties": [] + } + } + ], + "entity": { + "deleted": false, + "oaf": "{\"collectedfrom\":[{\"key\":\"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357\",\"value\":\"PubMed Central\",\"dataInfo\":null},{\"key\":\"10|openaire____::e034d6a11054f5ade9221ebac484e864\",\"value\":\"scholExplorer\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"key\":\"10|openaire____::806360c771262b4d6770e7cdf04b5c5a\",\"value\":\"ORCID\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"key\":\"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2\",\"value\":\"Crossref\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"key\":\"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a\",\"value\":\"Microsoft Academic Graph\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"key\":\"10|openaire____::8ac8380272269217cb09a928c8caa993\",\"value\":\"UnpayWall\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}],\"dataInfo\":{\"invisible\":false,\"inferred\":true,\"deletedbyinference\":false,\"trust\":\"0.8\",\"inferenceprovenance\":\"decisiontree-dedup-test\",\"provenanceaction\":{\"classid\":\"sysimport:dedup\",\"classname\":\"sysimport:dedup\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}},\"lastupdatetimestamp\":1589967085191,\"id\":\"50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10\",\"originalId\":[\"od_______267::4d85ada0191a351f529d1e8ace1a7117\",\"38908045\",\"10.1073/pnas.200372597\"],\"pid\":[{\"value\":\"PMC17177\",\"qualifier\":{\"classid\":\"pmc\",\"classname\":\"pmc\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}},{\"value\":\"11005843\",\"qualifier\":{\"classid\":\"pmid\",\"classname\":\"pmid\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}},{\"value\":\"10.1073/pnas.200372597\",\"qualifier\":{\"classid\":\"doi\",\"classname\":\"doi\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}],\"dateofcollection\":\"2019-07-01T18:50:57Z\",\"dateoftransformation\":\"\",\"extraInfo\":[],\"oaiprovenance\":{\"originDescription\":{\"harvestDate\":\"2020-05-10T12:23:13.896Z\",\"altered\":true,\"baseURL\":\"mongodb%3A%2F%2Fservices.openaire.eu\",\"identifier\":\"\",\"datestamp\":\"\",\"metadataNamespace\":\"\"}},\"author\":[{\"fullname\":\"S. Kim\",\"name\":\"S.\",\"surname\":\"Kim\",\"rank\":1,\"pid\":[],\"affiliation\":[]},{\"fullname\":\"Q. Li\",\"name\":\"Q.\",\"surname\":\"Li\",\"rank\":2,\"pid\":[{\"value\":\"2719402459\",\"qualifier\":{\"classid\":\"MAG Identifier\",\"classname\":\"MAG Identifier\",\"schemeid\":null,\"schemename\":null},\"dataInfo\":null}],\"affiliation\":[{\"value\":\"Johns Hopkins University\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}]},{\"fullname\":\"C. V. Dang\",\"name\":\"C. V.\",\"surname\":\"Dang\",\"rank\":3,\"pid\":[{\"value\":\"2250822210\",\"qualifier\":{\"classid\":\"MAG Identifier\",\"classname\":\"MAG Identifier\",\"schemeid\":null,\"schemename\":null},\"dataInfo\":null},{\"value\":\"0000-0002-4031-2522\",\"qualifier\":{\"classid\":\"ORCID\",\"classname\":\"ORCID\",\"schemeid\":null,\"schemename\":null},\"dataInfo\":null}],\"affiliation\":[]},{\"fullname\":\"L. A. Lee\",\"name\":\"L. A.\",\"surname\":\"Lee\",\"rank\":4,\"pid\":[],\"affiliation\":[]}],\"resulttype\":{\"classid\":\"publication\",\"classname\":\"publication\",\"schemeid\":\"dnet:result_typologies\",\"schemename\":\"dnet:result_typologies\"},\"language\":{\"classid\":\"eng\",\"classname\":\"English\",\"schemeid\":\"dnet:languages\",\"schemename\":\"dnet:languages\"},\"country\":[],\"subject\":[{\"value\":\"Biological Sciences\",\"qualifier\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}},{\"value\":\"Multidisciplinary\",\"qualifier\":{\"classid\":\"keyword\",\"classname\":\"keyword\",\"schemeid\":\"dnet:subject\",\"schemename\":\"dnet:subject\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}],\"title\":[{\"value\":\"Induction of ribosomal genes and hepatocyte hypertrophy by adenovirus-mediated expression of c-Myc in vivo\",\"qualifier\":{\"classid\":\"main title\",\"classname\":\"main title\",\"schemeid\":\"dnet:dataCite_title\",\"schemename\":\"dnet:dataCite_title\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}},{\"value\":\"Induction of ribosomal genes and hepatocyte hypertrophy by adenovirus-mediated expression of c-Myc in vivo.\",\"qualifier\":{\"classid\":\"main title\",\"classname\":\"main title\",\"schemeid\":\"dnet:dataCite_title\",\"schemename\":\"dnet:dataCite_title\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}],\"relevantdate\":[{\"value\":\"2018-11-13\",\"qualifier\":{\"classid\":\"dnet:date\",\"classname\":\"dnet:date\",\"schemeid\":\"dnet:date\",\"schemename\":\"dnet:date\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"value\":\"2000-01-01\",\"qualifier\":{\"classid\":\"issued\",\"classname\":\"issued\",\"schemeid\":\"dnet:dataCite_date\",\"schemename\":\"dnet:dataCite_date\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"value\":\"2000-9-26\",\"qualifier\":{\"classid\":\"published-online\",\"classname\":\"published-online\",\"schemeid\":\"dnet:dataCite_date\",\"schemename\":\"dnet:dataCite_date\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},{\"value\":\"2000-10-10\",\"qualifier\":{\"classid\":\"published-print\",\"classname\":\"published-print\",\"schemeid\":\"dnet:dataCite_date\",\"schemename\":\"dnet:dataCite_date\"},\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}],\"description\":[{\"value\":\"Overexpression of c-Myc in immortalized cells increases cell\\n proliferation, inhibits cell differentiation, and promotes cell\\n transformation. Recent evidence suggests that these effects, however,\\n do not necessarily occur when c-Myc is overexpressed in primary\\n mammalian cells. We sought to determine the immediate effects of\\n transient overexpression of c-Myc in primary cells in\\n vivo by using recombinant adenovirus to overexpress human\\n MYC in mouse liver. Mice were intravenously injected\\n with adenoviruses encoding MYC (Ad/Myc), E2F-1\\n (Ad/E2F-1), or \u03b2-galactosidase (Ad/LacZ). Transgene expression\\n was detectable 4 days after injection. Expression of ectopic c-Myc was\\n immediately accompanied by enlarged and dysmorphic hepatocytes in the\\n absence of significant cell proliferation or apoptosis. These\\n findings were not present in the livers of mice injected with\\n Ad/E2F-1 or Ad/LacZ. Prominent hepatocyte nuclei and nucleoli were\\n associated with the up-regulation of large- and small-subunit ribosomal\\n and nucleolar genes, suggesting that c-Myc may induce their expression\\n to increase cell mass. Our studies support a role for c-Myc in the\\n in vivo control of vertebrate cell size and metabolism\\n independent of cell proliferation.\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}}],\"dateofacceptance\":{\"value\":\"2000-01-01\",\"dataInfo\":null},\"publisher\":{\"value\":\"The National Academy of Sciences\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}},\"embargoenddate\":null,\"source\":[{\"value\":\"Scopus - Elsevier\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}],\"fulltext\":[],\"format\":[],\"contributor\":[],\"resourcetype\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"},\"coverage\":[],\"bestaccessright\":null,\"context\":[],\"externalReference\":[{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"membrane\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"membrane\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"tek\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"tek\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"ribosome biogenesis\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"ribosome biogenesis\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"bn51\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"bn51\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"c-myc\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"c-myc\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"nucleolin\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"nucleolin\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"uptake\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"uptake\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell growth\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell growth\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"\u03b2-galactosidase\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"\u03b2-galactosidase\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"estrogen receptor\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"estrogen receptor\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell differentiation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell differentiation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"pathogenesis\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"pathogenesis\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"localization\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"localization\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"p19arf\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"p19arf\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"glucose metabolism\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"glucose metabolism\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell cycle regulation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell cycle regulation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"mrdb\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"mrdb\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"gene expression\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"gene expression\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"eif-2a\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"eif-2a\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"cyclin d2\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"cyclin d2\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"pseudouridylation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"pseudouridylation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"e2f-1\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"e2f-1\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"keratinocyte proliferation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"keratinocyte proliferation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"biosynthesis\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"biosynthesis\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"nucleolus\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"nucleolus\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"metabolism\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"metabolism\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"dihydroorotase\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"dihydroorotase\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"liver regeneration\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"liver regeneration\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"ribosome assembly\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"ribosome assembly\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"protein translation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"protein translation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"translation initiation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"translation initiation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"nucleus\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"nucleus\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"mdm2\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"mdm2\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell cycle\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell cycle\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"ornithine decarboxylase\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"ornithine decarboxylase\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"https://www.targetvalidation.org\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"https://www.targetvalidation.org\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"mfl\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"mfl\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell proliferation\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell proliferation\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"chromatin\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"chromatin\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"eif-4e\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"eif-4e\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"e2a\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"e2a\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell development\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell development\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"myc\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"myc\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell-cycle phase\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"cell-cycle phase\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"cyclins d1\\\")\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=name:(\\\"cyclins d1\\\")\",\"query\":\"\",\"dataInfo\":null},{\"sitename\":\"Europe PMC\",\"label\":\"\",\"url\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"ribosomal subunit\\\")&sort=score\",\"description\":\"\",\"qualifier\":{\"classid\":\"url\",\"classname\":\"url\",\"schemeid\":\"dnet:externalReference_typologies\",\"schemename\":\"dnet:externalReference_typologies\"},\"refidentifier\":\"http://www.uniprot.org/uniprot/?query=go:(\\\"ribosomal subunit\\\")&sort=score\",\"query\":\"\",\"dataInfo\":null}],\"instance\":[{\"license\":null,\"accessright\":{\"classid\":\"OPEN\",\"classname\":\"Open Access\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0038\",\"classname\":\"Other literature type\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c\",\"value\":\"Europe PubMed Central\",\"dataInfo\":null},\"url\":[\"https://europepmc.org/articles/PMC17177/\"],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357\",\"value\":\"PubMed Central\",\"dataInfo\":null},\"dateofacceptance\":{\"value\":\"2000-09-26\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"sysimport:crosswalk\",\"classname\":\"sysimport:crosswalk\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"}}},\"processingchargeamount\":null,\"processingchargecurrency\":null,\"refereed\":null},{\"license\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"accessright\":{\"classid\":\"UNKNOWN\",\"classname\":\"not available\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0000\",\"classname\":\"Unknown\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\":\"Unknown Repository\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"url\":[\"https://www.ncbi.nlm.nih.gov/pubmed/11005843\"],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|openaire____::e034d6a11054f5ade9221ebac484e864\",\"value\":\"scholExplorer\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"dateofacceptance\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargeamount\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargecurrency\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"refereed\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}},{\"license\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"accessright\":{\"classid\":\"UNKNOWN\",\"classname\":\"UNKNOWN\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0001\",\"classname\":\"Article\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\":\"Unknown Repository\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"url\":[],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|openaire____::806360c771262b4d6770e7cdf04b5c5a\",\"value\":\"ORCID\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"dateofacceptance\":{\"value\":\"2000-01-01\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargeamount\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargecurrency\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"refereed\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}},{\"license\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"accessright\":{\"classid\":\"OPEN\",\"classname\":\"Open Access\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0001\",\"classname\":\"Article\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\":\"Unknown Repository\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"url\":[\"http://www.pnas.org/content/97/21/11198.full.pdf\"],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|openaire____::8ac8380272269217cb09a928c8caa993\",\"value\":\"UnpayWall\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"dateofacceptance\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargeamount\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargecurrency\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"refereed\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}},{\"license\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"accessright\":{\"classid\":\"UNKNOWN\",\"classname\":\"not available\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0001\",\"classname\":\"Article\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\":\"Unknown Repository\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"url\":[\"https://syndication.highwire.org/content/doi/10.1073/pnas.200372597\"],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2\",\"value\":\"Crossref\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"dateofacceptance\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargeamount\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargecurrency\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"refereed\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}},{\"license\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"accessright\":{\"classid\":\"UNKNOWN\",\"classname\":\"not available\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0001\",\"classname\":\"Article\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\":\"Unknown Repository\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"url\":[\"https://academic.microsoft.com/#/detail/2045899555\"],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a\",\"value\":\"Microsoft Academic Graph\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"dateofacceptance\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargeamount\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargecurrency\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"refereed\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}},{\"license\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"accessright\":{\"classid\":\"RESTRICTED\",\"classname\":\"Restricted\",\"schemeid\":\"dnet:access_modes\",\"schemename\":\"dnet:access_modes\"},\"instancetype\":{\"classid\":\"0001\",\"classname\":\"Article\",\"schemeid\":\"dnet:publication_resource\",\"schemename\":\"dnet:publication_resource\"},\"hostedby\":{\"key\":\"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\":\"Unknown Repository\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"url\":[\"http://dx.doi.org/10.1073/pnas.200372597\"],\"distributionlocation\":\"\",\"collectedfrom\":{\"key\":\"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2\",\"value\":\"Crossref\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"dateofacceptance\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargeamount\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"processingchargecurrency\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}},\"refereed\":{\"value\":\"\",\"dataInfo\":{\"invisible\":false,\"inferred\":false,\"deletedbyinference\":false,\"trust\":\"\",\"inferenceprovenance\":\"\",\"provenanceaction\":{\"classid\":\"\",\"classname\":\"\",\"schemeid\":\"\",\"schemename\":\"\"}}}}],\"journal\":null}", + "type": "publication", + "id": "50|dedup_wf_001::00f53f19cfaf4dde8d316e9e71f16a10" + } +} \ No newline at end of file From 3ceb2d2853d7f6ebcc99ad48fe96373632b3a11b Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 27 May 2020 11:34:13 +0200 Subject: [PATCH 22/25] match terms with vocabularies --- .../raw/AbstractMdRecordToOafMapper.java | 46 ++++++++---- .../raw/GenerateEntitiesApplication.java | 70 +++++++------------ .../dhp/oa/graph/raw/OafToOafMapper.java | 18 ++--- .../dhp/oa/graph/raw/OdfToOafMapper.java | 32 ++++----- .../dhp/oa/graph/raw/common/Vocabulary.java | 42 +++++++++++ .../oa/graph/raw/common/VocabularyGroup.java | 49 +++++++++++++ .../oa/graph/raw/common/VocabularyTerm.java | 22 ++++++ .../graph/generate_entities_parameters.json | 20 +----- .../oa/graph/raw_all/oozie_app/workflow.xml | 12 ++-- .../oa/graph/raw_step2/oozie_app/workflow.xml | 17 ++--- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 27 +++++-- 11 files changed, 225 insertions(+), 130 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 99334dd7f9..ab1e89187a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -38,6 +38,7 @@ import org.dom4j.DocumentFactory; import org.dom4j.DocumentHelper; import org.dom4j.Node; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.common.LicenseComparator; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Context; @@ -60,7 +61,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public abstract class AbstractMdRecordToOafMapper { - protected final Map code2name; + protected final VocabularyGroup vocs; protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; @@ -84,8 +85,8 @@ public abstract class AbstractMdRecordToOafMapper { protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); - protected AbstractMdRecordToOafMapper(final Map code2name) { - this.code2name = code2name; + protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs) { + this.vocs = vocs; } public List processMdRecord(final String xml) { @@ -421,14 +422,12 @@ public abstract class AbstractMdRecordToOafMapper { return null; } - protected Qualifier prepareQualifier( - final Node node, - final String xpath, - final String schemeId, - final String schemeName) { - final String classId = node.valueOf(xpath); - final String className = code2name.get(classId); - return qualifier(classId, className, schemeId, schemeName); + protected Qualifier prepareQualifier(final Node node, final String xpath, final String schemeId) { + return prepareQualifier(node.valueOf(xpath).trim(), schemeId); + } + + protected Qualifier prepareQualifier(final String classId, final String schemeId) { + return vocs.getTermAsQualifier(schemeId, classId); } protected List prepareListStructProps( @@ -436,14 +435,31 @@ public abstract class AbstractMdRecordToOafMapper { final String xpath, final String xpathClassId, final String schemeId, - final String schemeName, final DataInfo info) { final List res = new ArrayList<>(); + for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; - final String classId = n.valueOf(xpathClassId); - final String className = code2name.get(classId); - res.add(structuredProperty(n.getText(), classId, className, schemeId, schemeName, info)); + final String classId = n.valueOf(xpathClassId).trim(); + res.add(structuredProperty(n.getText(), prepareQualifier(classId, schemeId), info)); + } + return res; + } + + protected List prepareListStructPropsWithValidQualifier( + final Node node, + final String xpath, + final String xpathClassId, + final String schemeId, + final DataInfo info) { + final List res = new ArrayList<>(); + + for (final Object o : node.selectNodes(xpath)) { + final Node n = (Node) o; + final String classId = n.valueOf(xpathClassId).trim(); + if (vocs.termExists(schemeId, classId)) { + res.add(structuredProperty(n.getText(), vocs.getTermAsQualifier(schemeId, classId), info)); + } } return res; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 0d140e9adc..3becdec448 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -4,11 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; -import java.sql.SQLException; import java.util.Arrays; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; @@ -29,8 +26,8 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.DbClient; import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Datasource; @@ -71,25 +68,24 @@ public class GenerateEntitiesApplication { final String sourcePaths = parser.get("sourcePaths"); final String targetPath = parser.get("targetPath"); - final String dbUrl = parser.get("postgresUrl"); - final String dbUser = parser.get("postgresUser"); - final String dbPassword = parser.get("postgresPassword"); + // final String dbUrl = parser.get("postgresUrl"); + // final String dbUser = parser.get("postgresUser"); + // final String dbPassword = parser.get("postgresPassword"); + final String isLookupUrl = parser.get("isLookupUrl"); - final Map code2name = loadVocsFromDB(dbUrl, dbUser, dbPassword); - - code2name.putAll(loadVocsFromIS(isLookupUrl)); + final VocabularyGroup vocs = loadVocsFromIS(isLookupUrl); // MAP: vocId -> voc final SparkConf conf = new SparkConf(); runWithSparkSession(conf, isSparkSessionManaged, spark -> { removeOutputDir(spark, targetPath); - generateEntities(spark, code2name, sourcePaths, targetPath); + generateEntities(spark, vocs, sourcePaths, targetPath); }); } private static void generateEntities( final SparkSession spark, - final Map code2name, + final VocabularyGroup vocs, final String sourcePaths, final String targetPath) { @@ -110,7 +106,7 @@ public class GenerateEntitiesApplication { sc .sequenceFile(sp, Text.class, Text.class) .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) - .map(k -> convertToListOaf(k._1(), k._2(), code2name)) + .map(k -> convertToListOaf(k._1(), k._2(), vocs)) .filter(Objects::nonNull) .flatMap(list -> list.iterator())); } @@ -140,14 +136,14 @@ public class GenerateEntitiesApplication { private static List convertToListOaf( final String id, final String s, - final Map code2name) { + final VocabularyGroup vocs) { final String type = StringUtils.substringAfter(id, ":"); switch (type.toLowerCase()) { case "native_oaf": - return new OafToOafMapper(code2name).processMdRecord(s); + return new OafToOafMapper(vocs).processMdRecord(s); case "native_odf": - return new OdfToOafMapper(code2name).processMdRecord(s); + return new OdfToOafMapper(vocs).processMdRecord(s); case "datasource": return Arrays.asList(convertFromJson(s, Datasource.class)); case "organization": @@ -169,32 +165,7 @@ public class GenerateEntitiesApplication { } } - private static Map loadVocsFromDB( - final String dbUrl, - final String dbUser, - final String dbPassword) throws IOException { - - log.info("Loading vocabulary terms from db..."); - - final Map map = new HashMap<>(); - - try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) { - dbClient - .processResults("select code, name from class", rs -> { - try { - map.put(rs.getString("code"), rs.getString("name")); - } catch (final SQLException e) { - e.printStackTrace(); - } - }); - } - - log.info("Found " + map.size() + " terms."); - - return map; - } - - private static Map loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { + private static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); final String xquery = IOUtils @@ -202,16 +173,25 @@ public class GenerateEntitiesApplication { GenerateEntitiesApplication.class .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); - final Map map = new HashMap<>(); + final VocabularyGroup vocs = new VocabularyGroup(); for (final String s : isLookUpService.quickSearchProfile(xquery)) { final String[] arr = s.split("@=@"); if (arr.length == 4) { - map.put(arr[2].trim(), arr[3].trim()); + final String vocId = arr[0].trim(); + final String vocName = arr[1].trim(); + final String termId = arr[2].trim(); + final String termName = arr[3].trim(); + + if (!vocs.vocabularyExists(vocId)) { + vocs.addVocabulary(vocId, vocName); + } + + vocs.addTerm(vocId, termId, termName); } } - return map; + return vocs; } private static Oaf convertFromJson(final String s, final Class clazz) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 3b8f1170f7..50f5b73ac8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE; import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET; @@ -13,7 +14,6 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; import java.util.ArrayList; import java.util.List; -import java.util.Map; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -24,6 +24,7 @@ import org.dom4j.Node; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Field; @@ -36,8 +37,8 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class OafToOafMapper extends AbstractMdRecordToOafMapper { - public OafToOafMapper(final Map code2name) { - super(code2name); + public OafToOafMapper(final VocabularyGroup vocs) { + super(vocs); } @Override @@ -83,7 +84,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareLanguages(final Document doc) { - return prepareQualifier(doc, "//dc:language", DNET_LANGUAGES, DNET_LANGUAGES); + return prepareQualifier(doc, "//dc:language", DNET_LANGUAGES); } @Override @@ -130,14 +131,13 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype( - prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance - .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES)); + .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); instance @@ -300,7 +300,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareResultPids(final Document doc, final DataInfo info) { - return prepareListStructProps( - doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info); + return prepareListStructPropsWithValidQualifier( + doc, "//oaf:identifier", "@identifierType", DNET_PID_TYPES, info); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index f8d9b3cd92..2026927c17 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -8,6 +8,7 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE; import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS; import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF; @@ -21,7 +22,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; import org.apache.commons.lang3.StringUtils; @@ -29,6 +29,7 @@ import org.dom4j.Document; import org.dom4j.Node; import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Field; @@ -43,8 +44,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/"; - public OdfToOafMapper(final Map code2name) { - super(code2name); + public OdfToOafMapper(final VocabularyGroup vocs) { + super(vocs); } @Override @@ -120,14 +121,13 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype( - prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance - .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES)); + .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); @@ -211,7 +211,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareLanguages(final Document doc) { - return prepareQualifier(doc, "//datacite:language", DNET_LANGUAGES, DNET_LANGUAGES); + return prepareQualifier(doc, "//datacite:language", DNET_LANGUAGES); } @Override @@ -239,7 +239,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { - return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages"); + return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages"); } @Override @@ -366,8 +366,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { return prepareQualifier( - doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, - DNET_DATA_CITE_RESOURCE); + doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE); } @Override @@ -375,18 +374,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final List res = new ArrayList<>(); res .addAll( - prepareListStructProps( - doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + prepareListStructPropsWithValidQualifier( + doc, "//oaf:identifier", "@identifierType", DNET_PID_TYPES, info)); res .addAll( - prepareListStructProps( - doc, "//datacite:identifier[@identifierType != 'URL']", "@identifierType", "dnet:pid_types", - "dnet:pid_types", info)); + prepareListStructPropsWithValidQualifier( + doc, "//datacite:identifier[@identifierType != 'URL']", "@identifierType", DNET_PID_TYPES, info)); res .addAll( - prepareListStructProps( + prepareListStructPropsWithValidQualifier( doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL']", - "@alternateIdentifierType", "dnet:pid_types", "dnet:pid_types", info)); + "@alternateIdentifierType", DNET_PID_TYPES, info)); return res; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java new file mode 100644 index 0000000000..7714f6d90c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.oa.graph.raw.common; + +import java.util.HashMap; +import java.util.Map; + +public class Vocabulary { + + private final String id; + private final String name; + + private final Map terms = new HashMap<>(); + + public Vocabulary(final String id, final String name) { + this.id = id; + this.name = name; + } + + public String getId() { + return id; + } + + public String getName() { + return name; + } + + protected Map getTerms() { + return terms; + } + + public VocabularyTerm getTerm(final String id) { + return terms.get(id.toLowerCase()); + } + + protected void addTerm(final String id, final String name) { + terms.put(id.toLowerCase(), new VocabularyTerm(id, name)); + } + + protected boolean termExists(final String id) { + return terms.containsKey(id.toLowerCase()); + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java new file mode 100644 index 0000000000..127f73e220 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java @@ -0,0 +1,49 @@ + +package eu.dnetlib.dhp.oa.graph.raw.common; + +import java.util.HashMap; +import java.util.Map; + +import eu.dnetlib.dhp.schema.oaf.Qualifier; + +public class VocabularyGroup { + + private final Map vocs = new HashMap<>(); + + public void addVocabulary(final String id, final String name) { + vocs.put(id.toLowerCase(), new Vocabulary(id, name)); + } + + public void addTerm(final String vocId, final String id, final String name) { + if (vocabularyExists(vocId)) { + vocs.get(vocId.toLowerCase()).addTerm(id, name); + } + } + + public VocabularyTerm getTerm(final String vocId, final String id) { + if (termExists(vocId, id)) { + return vocs.get(vocId.toLowerCase()).getTerm(id); + } else { + return new VocabularyTerm(id, id); + } + } + + public Qualifier getTermAsQualifier(final String vocId, final String id) { + if (termExists(vocId, id)) { + final Vocabulary v = vocs.get(vocId.toLowerCase()); + final VocabularyTerm t = v.getTerm(id); + return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName()); + } else { + return OafMapperUtils.qualifier(id, id, vocId, vocId); + } + } + + public boolean termExists(final String vocId, final String id) { + return vocabularyExists(vocId) && vocs.get(vocId.toLowerCase()).termExists(id); + } + + public boolean vocabularyExists(final String vocId) { + return vocs.containsKey(vocId.toLowerCase()); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java new file mode 100644 index 0000000000..b3c7859231 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java @@ -0,0 +1,22 @@ + +package eu.dnetlib.dhp.oa.graph.raw.common; + +public class VocabularyTerm { + + private final String id; + private final String name; + + public VocabularyTerm(final String id, final String name) { + this.id = id; + this.name = name; + } + + public String getId() { + return id; + } + + public String getName() { + return name; + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json index d4d0935a0e..9e3992bcfb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json @@ -17,27 +17,9 @@ "paramDescription": "the path of the target file", "paramRequired": true }, - { - "paramName": "pgurl", - "paramLongName": "postgresUrl", - "paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb", - "paramRequired": true - }, - { - "paramName": "pguser", - "paramLongName": "postgresUser", - "paramDescription": "postgres user", - "paramRequired": false - }, - { - "paramName": "pgpasswd", - "paramLongName": "postgresPassword", - "paramDescription": "postgres password", - "paramRequired": false - }, { "paramName": "islookup", - "paramLongName": "isLookupUrl", + "paramLongName": "islookup", "paramDescription": "the url of the ISLookupService", "paramRequired": true } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index fa015499c9..f8426c35f2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -34,6 +34,10 @@ mongoDb mongo database + + isLookupUrl + the address of the lookUp service + sparkDriverMemory @@ -233,9 +237,7 @@ --sourcePaths${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims --targetPath${workingDir}/entities_claim - --postgresUrl${postgresURL} - --postgresUser${postgresUser} - --postgresPassword${postgresPassword} + --islookup${isLookupUrl} @@ -282,9 +284,7 @@ --sourcePaths${contentPath}/db_records,${contentPath}/oaf_records,${contentPath}/odf_records --targetPath${workingDir}/entities - --postgresUrl${postgresURL} - --postgresUser${postgresUser} - --postgresPassword${postgresPassword} + --islookup${isLookupUrl} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml index cd0a4025e8..f6485ea9c3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml @@ -9,17 +9,10 @@ the temporary path to store entities before dispatching - postgresURL - the postgres URL to access to the database - - - postgresUser - the user postgres - - - postgresPassword - the password postgres + isLookupUrl + the address of the lookUp service + sparkDriverMemory memory for driver process @@ -62,9 +55,7 @@ -mt yarn-cluster -s${migrationPathStep1}/db_records,${migrationPathStep1}/oaf_records,${migrationPathStep1}/odf_records -t${migrationPathStep2}/all_entities - -pgurl${postgresURL} - -pguser${postgresUser} - -pgpasswd${postgresPassword} + --islookup${isLookupUrl} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index b9da9fb297..dad427ce4b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -9,7 +9,6 @@ import static org.mockito.Mockito.when; import java.io.IOException; import java.util.List; -import java.util.Map; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -20,6 +19,8 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Dataset; @@ -34,18 +35,27 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class MappersTest { @Mock - private Map code2name; + private VocabularyGroup vocs; @BeforeEach public void setUp() throws Exception { - when(code2name.get(anyString())).thenAnswer(invocation -> invocation.getArgument(0)); + when(vocs.getTermAsQualifier(anyString(), anyString())) + .thenAnswer( + invocation -> OafMapperUtils + .qualifier( + invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), + invocation.getArgument(0))); + + when(vocs.termExists(anyString(), anyString())).thenReturn(true); + } @Test void testPublication() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); - final List list = new OafToOafMapper(code2name).processMdRecord(xml); + final List list = new OafToOafMapper(vocs).processMdRecord(xml); assertEquals(3, list.size()); assertTrue(list.get(0) instanceof Publication); @@ -86,6 +96,10 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline())); assertTrue(StringUtils.isNotBlank(p.getJournal().getName())); + assertTrue(p.getPid().size() > 0); + assertEquals(p.getPid().get(0).getValue(), "10.3897/oneeco.2.e13718"); + assertEquals(p.getPid().get(0).getQualifier().getClassid(), "doi"); + assertNotNull(p.getInstance()); assertTrue(p.getInstance().size() > 0); p @@ -115,6 +129,7 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r1.getRelType())); assertTrue(StringUtils.isNotBlank(r2.getRelType())); + // System.out.println(new ObjectMapper().writeValueAsString(p)); // System.out.println(new ObjectMapper().writeValueAsString(r1)); // System.out.println(new ObjectMapper().writeValueAsString(r2)); } @@ -123,7 +138,7 @@ public class MappersTest { void testDataset() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset.xml")); - final List list = new OdfToOafMapper(code2name).processMdRecord(xml); + final List list = new OdfToOafMapper(vocs).processMdRecord(xml); assertEquals(3, list.size()); assertTrue(list.get(0) instanceof Dataset); @@ -205,7 +220,7 @@ public class MappersTest { void testSoftware() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml")); - final List list = new OdfToOafMapper(code2name).processMdRecord(xml); + final List list = new OdfToOafMapper(vocs).processMdRecord(xml); assertEquals(1, list.size()); assertTrue(list.get(0) instanceof Software); From 7a7272d9ec0cfe6cbf95b9bf4797b2819db9535b Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 26 May 2020 13:06:55 +0200 Subject: [PATCH 23/25] result pids (new xpaths + IS vocabularies) --- .../raw/AbstractMdRecordToOafMapper.java | 167 ++++++++---------- .../raw/GenerateEntitiesApplication.java | 161 ++++++++++------- .../dhp/oa/graph/raw/OafToOafMapper.java | 33 ++-- .../dhp/oa/graph/raw/OdfToOafMapper.java | 54 +++--- .../graph/generate_entities_parameters.json | 7 +- 5 files changed, 217 insertions(+), 205 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 5c89d5096f..278544e0b0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -10,10 +10,27 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.NOT_AVAILABLE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -47,10 +64,8 @@ public abstract class AbstractMdRecordToOafMapper { protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; - protected static final Qualifier ORCID_PID_TYPE = qualifier( - "ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); - protected static final Qualifier MAG_PID_TYPE = qualifier( - "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); protected static final Map nsContext = new HashMap<>(); @@ -64,8 +79,7 @@ public abstract class AbstractMdRecordToOafMapper { nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3); } - protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( - "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); protected AbstractMdRecordToOafMapper(final Map code2name) { this.code2name = code2name; @@ -79,20 +93,15 @@ public abstract class AbstractMdRecordToOafMapper { .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); final String type = doc.valueOf("//dr:CobjCategory/@type"); - final KeyValue collectedFrom = getProvenanceDatasource( - doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - if (collectedFrom == null) { - return null; - } + if (collectedFrom == null) { return null; } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - if (hostedBy == null) { - return null; - } + if (hostedBy == null) { return null; } final DataInfo info = prepareDataInfo(doc); final long lastUpdateTimestamp = new Date().getTime(); @@ -107,9 +116,7 @@ public abstract class AbstractMdRecordToOafMapper { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { - return null; - } + if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { return null; } return keyValue(createOpenaireId(10, dsId, true), dsName); } @@ -125,47 +132,47 @@ public abstract class AbstractMdRecordToOafMapper { final List oafs = new ArrayList<>(); switch (type.toLowerCase()) { - case "publication": - final Publication p = new Publication(); - populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); - p.setJournal(prepareJournal(doc, info)); - oafs.add(p); - break; - case "dataset": - final Dataset d = new Dataset(); - populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - d.setResulttype(DATASET_DEFAULT_RESULTTYPE); - d.setStoragedate(prepareDatasetStorageDate(doc, info)); - d.setDevice(prepareDatasetDevice(doc, info)); - d.setSize(prepareDatasetSize(doc, info)); - d.setVersion(prepareDatasetVersion(doc, info)); - d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); - d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); - d.setGeolocation(prepareDatasetGeoLocations(doc, info)); - oafs.add(d); - break; - case "software": - final Software s = new Software(); - populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); - s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); - s.setLicense(prepareSoftwareLicenses(doc, info)); - s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); - s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); - oafs.add(s); - break; - case "": - case "otherresearchproducts": - default: - final OtherResearchProduct o = new OtherResearchProduct(); - populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - o.setResulttype(ORP_DEFAULT_RESULTTYPE); - o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); - o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); - o.setTool(prepareOtherResearchProductTools(doc, info)); - oafs.add(o); - break; + case "publication": + final Publication p = new Publication(); + populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); + p.setJournal(prepareJournal(doc, info)); + oafs.add(p); + break; + case "dataset": + final Dataset d = new Dataset(); + populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + d.setResulttype(DATASET_DEFAULT_RESULTTYPE); + d.setStoragedate(prepareDatasetStorageDate(doc, info)); + d.setDevice(prepareDatasetDevice(doc, info)); + d.setSize(prepareDatasetSize(doc, info)); + d.setVersion(prepareDatasetVersion(doc, info)); + d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); + d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); + d.setGeolocation(prepareDatasetGeoLocations(doc, info)); + oafs.add(d); + break; + case "software": + final Software s = new Software(); + populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); + s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); + s.setLicense(prepareSoftwareLicenses(doc, info)); + s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); + s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); + oafs.add(s); + break; + case "": + case "otherresearchproducts": + default: + final OtherResearchProduct o = new OtherResearchProduct(); + populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + o.setResulttype(ORP_DEFAULT_RESULTTYPE); + o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); + o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); + o.setTool(prepareOtherResearchProductTools(doc, info)); + oafs.add(o); + break; } if (!oafs.isEmpty()) { @@ -194,15 +201,9 @@ public abstract class AbstractMdRecordToOafMapper { final String projectId = createOpenaireId(40, originalId, true); res - .add( - getRelation( - docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, lastUpdateTimestamp)); res - .add( - getRelation( - projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, lastUpdateTimestamp)); } } @@ -247,10 +248,7 @@ public abstract class AbstractMdRecordToOafMapper { r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier"))); r.setCollectedfrom(Arrays.asList(collectedFrom)); - r - .setPid( - prepareListStructProps( - doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + r.setPid(prepareResultPids(doc, info)); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES @@ -278,6 +276,8 @@ public abstract class AbstractMdRecordToOafMapper { r.setBestaccessright(getBestAccessRights(instances)); } + protected abstract List prepareResultPids(Document doc, DataInfo info); + private List prepareContexts(final Document doc, final DataInfo info) { final List list = new ArrayList<>(); for (final Object o : doc.selectNodes("//oaf:concept")) { @@ -358,7 +358,7 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract Field prepareDatasetStorageDate(Document doc, DataInfo info); - protected static Qualifier getBestAccessRights(List instanceList) { + protected static Qualifier getBestAccessRights(final List instanceList) { if (instanceList != null) { final Optional min = instanceList .stream() @@ -398,9 +398,7 @@ public abstract class AbstractMdRecordToOafMapper { final String sp = n.valueOf("@sp"); final String vol = n.valueOf("@vol"); final String edition = n.valueOf("@edition"); - if (StringUtils.isNotBlank(name)) { - return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); - } + if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); } } return null; } @@ -453,10 +451,7 @@ public abstract class AbstractMdRecordToOafMapper { for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; res - .add( - structuredProperty( - n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), - n.valueOf("@schemename"), info)); + .add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info)); } return res; } @@ -464,9 +459,7 @@ public abstract class AbstractMdRecordToOafMapper { protected OAIProvenance prepareOAIprovenance(final Document doc) { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); - if (n == null) { - return null; - } + if (n == null) { return null; } final String identifier = n.valueOf("./*[local-name()='identifier']"); final String baseURL = n.valueOf("./*[local-name()='baseURL']"); @@ -481,9 +474,7 @@ public abstract class AbstractMdRecordToOafMapper { protected DataInfo prepareDataInfo(final Document doc) { final Node n = doc.selectSingleNode("//oaf:datainfo"); - if (n == null) { - return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); - } + if (n == null) { return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); @@ -495,9 +486,7 @@ public abstract class AbstractMdRecordToOafMapper { final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); final String trust = n.valueOf("./oaf:trust"); - return dataInfo( - deletedbyinference, inferenceprovenance, inferred, false, - qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); + return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); } protected Field prepareField(final Node node, final String xpath, final DataInfo info) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 739c7a4629..d184364174 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -5,7 +5,12 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; import java.sql.SQLException; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -27,7 +32,19 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.DbClient; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import scala.Tuple2; public class GenerateEntitiesApplication { @@ -39,14 +56,12 @@ public class GenerateEntitiesApplication { public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString( - MigrateMongoMdstoresApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json"))); + .toString(GenerateEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json"))); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); @@ -58,17 +73,17 @@ public class GenerateEntitiesApplication { final String dbUrl = parser.get("postgresUrl"); final String dbUser = parser.get("postgresUser"); final String dbPassword = parser.get("postgresPassword"); + final String isLookupUrl = parser.get("isLookupUrl"); - final Map code2name = loadClassNames(dbUrl, dbUser, dbPassword); + final Map code2name = loadVocsFromDB(dbUrl, dbUser, dbPassword); - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, targetPath); - generateEntities(spark, code2name, sourcePaths, targetPath); - }); + code2name.putAll(loadVocsFromIS(isLookupUrl)); + + final SparkConf conf = new SparkConf(); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, targetPath); + generateEntities(spark, code2name, sourcePaths, targetPath); + }); } private static void generateEntities( @@ -77,7 +92,7 @@ public class GenerateEntitiesApplication { final String sourcePaths, final String targetPath) { - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final List existingSourcePaths = Arrays .stream(sourcePaths.split(",")) .filter(p -> exists(sc, p)) @@ -90,27 +105,25 @@ public class GenerateEntitiesApplication { for (final String sp : existingSourcePaths) { inputRdd = inputRdd - .union( - sc - .sequenceFile(sp, Text.class, Text.class) - .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) - .map(k -> convertToListOaf(k._1(), k._2(), code2name)) - .filter(Objects::nonNull) - .flatMap(list -> list.iterator())); + .union(sc + .sequenceFile(sp, Text.class, Text.class) + .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) + .map(k -> convertToListOaf(k._1(), k._2(), code2name)) + .filter(Objects::nonNull) + .flatMap(list -> list.iterator())); } inputRdd .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) .reduceByKey((o1, o2) -> merge(o1, o2)) .map(Tuple2::_2) - .map( - oaf -> oaf.getClass().getSimpleName().toLowerCase() - + "|" - + OBJECT_MAPPER.writeValueAsString(oaf)) + .map(oaf -> oaf.getClass().getSimpleName().toLowerCase() + + "|" + + OBJECT_MAPPER.writeValueAsString(oaf)) .saveAsTextFile(targetPath, GzipCodec.class); } - private static Oaf merge(Oaf o1, Oaf o2) { + private static Oaf merge(final Oaf o1, final Oaf o2) { if (ModelSupport.isSubClass(o1, OafEntity.class)) { ((OafEntity) o1).mergeFrom((OafEntity) o2); } else if (ModelSupport.isSubClass(o1, Relation.class)) { @@ -122,37 +135,41 @@ public class GenerateEntitiesApplication { } private static List convertToListOaf( - final String id, final String s, final Map code2name) { + final String id, + final String s, + final Map code2name) { final String type = StringUtils.substringAfter(id, ":"); switch (type.toLowerCase()) { - case "native_oaf": - return new OafToOafMapper(code2name).processMdRecord(s); - case "native_odf": - return new OdfToOafMapper(code2name).processMdRecord(s); - case "datasource": - return Arrays.asList(convertFromJson(s, Datasource.class)); - case "organization": - return Arrays.asList(convertFromJson(s, Organization.class)); - case "project": - return Arrays.asList(convertFromJson(s, Project.class)); - case "relation": - return Arrays.asList(convertFromJson(s, Relation.class)); - case "publication": - return Arrays.asList(convertFromJson(s, Publication.class)); - case "dataset": - return Arrays.asList(convertFromJson(s, Dataset.class)); - case "software": - return Arrays.asList(convertFromJson(s, Software.class)); - case "otherresearchproduct": - return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); - default: - throw new RuntimeException("type not managed: " + type.toLowerCase()); + case "native_oaf": + return new OafToOafMapper(code2name).processMdRecord(s); + case "native_odf": + return new OdfToOafMapper(code2name).processMdRecord(s); + case "datasource": + return Arrays.asList(convertFromJson(s, Datasource.class)); + case "organization": + return Arrays.asList(convertFromJson(s, Organization.class)); + case "project": + return Arrays.asList(convertFromJson(s, Project.class)); + case "relation": + return Arrays.asList(convertFromJson(s, Relation.class)); + case "publication": + return Arrays.asList(convertFromJson(s, Publication.class)); + case "dataset": + return Arrays.asList(convertFromJson(s, Dataset.class)); + case "software": + return Arrays.asList(convertFromJson(s, Software.class)); + case "otherresearchproduct": + return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); + default: + throw new RuntimeException("type not managed: " + type.toLowerCase()); } } - private static Map loadClassNames( - final String dbUrl, final String dbUser, final String dbPassword) throws IOException { + private static Map loadVocsFromDB( + final String dbUrl, + final String dbUser, + final String dbPassword) throws IOException { log.info("Loading vocabulary terms from db..."); @@ -160,15 +177,13 @@ public class GenerateEntitiesApplication { try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) { dbClient - .processResults( - "select code, name from class", - rs -> { - try { - map.put(rs.getString("code"), rs.getString("name")); - } catch (final SQLException e) { - e.printStackTrace(); - } - }); + .processResults("select code, name from class", rs -> { + try { + map.put(rs.getString("code"), rs.getString("name")); + } catch (final SQLException e) { + e.printStackTrace(); + } + }); } log.info("Found " + map.size() + " terms."); @@ -176,6 +191,24 @@ public class GenerateEntitiesApplication { return map; } + private static Map loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { + final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); + + final String xquery = + IOUtils.toString(GenerateEntitiesApplication.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); + + final Map map = new HashMap<>(); + + for (final String s : isLookUpService.quickSearchProfile(xquery)) { + final String[] arr = s.split("@=@"); + if (arr.length == 4) { + map.put(arr[2].trim(), arr[3].trim()); + } + } + + return map; + } + private static Oaf convertFromJson(final String s, final Class clazz) { try { return OBJECT_MAPPER.readValue(s, clazz); @@ -196,7 +229,7 @@ public class GenerateEntitiesApplication { } } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 53c0913c23..a0984eed42 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -130,8 +130,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype( - prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); @@ -147,14 +146,13 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); instance - .setUrl( - nodes - .stream() - .filter(n -> StringUtils.isNotBlank(n.getText())) - .map(n -> n.getText().trim()) - .filter(u -> u.startsWith("http")) - .distinct() - .collect(Collectors.toCollection(ArrayList::new))); + .setUrl(nodes + .stream() + .filter(n -> StringUtils.isNotBlank(n.getText())) + .map(n -> n.getText().trim()) + .filter(u -> u.startsWith("http")) + .distinct() + .collect(Collectors.toCollection(ArrayList::new))); return Lists.newArrayList(instance); } @@ -279,15 +277,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final String otherId = createOpenaireId(50, originalId, false); res - .add( - getRelation( - docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp)); res - .add( - getRelation( - otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp)); } } return res; @@ -297,4 +289,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { return null; // NOT PRESENT IN OAF } + + @Override + protected List prepareResultPids(final Document doc, final DataInfo info) { + return prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index b9a1599174..64af8a7512 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -120,8 +120,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype( - prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); @@ -170,10 +169,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { && !dateType.equalsIgnoreCase("Updated") && !dateType.equalsIgnoreCase("Available")) { res - .add( - structuredProperty( - ((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, - info)); + .add(structuredProperty(((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info)); } } return res; @@ -225,16 +221,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected List> prepareOtherResearchProductContactGroups( final Document doc, final DataInfo info) { - return prepareListFields( - doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); + return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); } @Override protected List> prepareOtherResearchProductContactPersons( final Document doc, final DataInfo info) { - return prepareListFields( - doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); + return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); } @Override @@ -260,8 +254,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected List> prepareSoftwareDocumentationUrls( final Document doc, final DataInfo info) { - return prepareListFields( - doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); + return prepareListFields(doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); } // DATASETS @@ -335,29 +328,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { if (type.equalsIgnoreCase("IsSupplementTo")) { res - .add( - getRelation( - docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, lastUpdateTimestamp)); res - .add( - getRelation( - otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, lastUpdateTimestamp)); } else if (type.equals("IsPartOf")) { res - .add( - getRelation( - docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelation(docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, lastUpdateTimestamp)); res - .add( - getRelation( - otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, - lastUpdateTimestamp)); - } else { - } + .add(getRelation(otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, lastUpdateTimestamp)); + } else {} } } return res; @@ -365,8 +345,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { - return prepareQualifier( - doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, - DNET_DATA_CITE_RESOURCE); + return prepareQualifier(doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, DNET_DATA_CITE_RESOURCE); } + + @Override + protected List prepareResultPids(final Document doc, final DataInfo info) { + final List res = new ArrayList<>(); + res.addAll(prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + res.addAll(prepareListStructProps(doc, "//datacite:identifier[@identifierType != 'URL']", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + res.addAll(prepareListStructProps(doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL']", "@alternateIdentifierType", "dnet:pid_types", "dnet:pid_types", info)); + return res; + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json index 293bf041b9..d4d0935a0e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json @@ -34,6 +34,11 @@ "paramLongName": "postgresPassword", "paramDescription": "postgres password", "paramRequired": false + }, + { + "paramName": "islookup", + "paramLongName": "isLookupUrl", + "paramDescription": "the url of the ISLookupService", + "paramRequired": true } - ] \ No newline at end of file From a25598140a978fb781f570aaa84db5b957a0caa9 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 26 May 2020 13:11:09 +0200 Subject: [PATCH 24/25] result pids (new xpaths + IS vocabularies) --- .../raw/AbstractMdRecordToOafMapper.java | 137 +++++++++++------- .../raw/GenerateEntitiesApplication.java | 75 +++++----- .../dhp/oa/graph/raw/OafToOafMapper.java | 31 ++-- .../dhp/oa/graph/raw/OdfToOafMapper.java | 61 ++++++-- 4 files changed, 189 insertions(+), 115 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 278544e0b0..99334dd7f9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -64,8 +64,10 @@ public abstract class AbstractMdRecordToOafMapper { protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; - protected static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); - protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier ORCID_PID_TYPE = qualifier( + "ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier MAG_PID_TYPE = qualifier( + "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); protected static final Map nsContext = new HashMap<>(); @@ -79,7 +81,8 @@ public abstract class AbstractMdRecordToOafMapper { nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3); } - protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( + "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); protected AbstractMdRecordToOafMapper(final Map code2name) { this.code2name = code2name; @@ -93,15 +96,20 @@ public abstract class AbstractMdRecordToOafMapper { .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); final String type = doc.valueOf("//dr:CobjCategory/@type"); - final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + final KeyValue collectedFrom = getProvenanceDatasource( + doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - if (collectedFrom == null) { return null; } + if (collectedFrom == null) { + return null; + } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - if (hostedBy == null) { return null; } + if (hostedBy == null) { + return null; + } final DataInfo info = prepareDataInfo(doc); final long lastUpdateTimestamp = new Date().getTime(); @@ -116,7 +124,9 @@ public abstract class AbstractMdRecordToOafMapper { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { return null; } + if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { + return null; + } return keyValue(createOpenaireId(10, dsId, true), dsName); } @@ -132,47 +142,47 @@ public abstract class AbstractMdRecordToOafMapper { final List oafs = new ArrayList<>(); switch (type.toLowerCase()) { - case "publication": - final Publication p = new Publication(); - populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); - p.setJournal(prepareJournal(doc, info)); - oafs.add(p); - break; - case "dataset": - final Dataset d = new Dataset(); - populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - d.setResulttype(DATASET_DEFAULT_RESULTTYPE); - d.setStoragedate(prepareDatasetStorageDate(doc, info)); - d.setDevice(prepareDatasetDevice(doc, info)); - d.setSize(prepareDatasetSize(doc, info)); - d.setVersion(prepareDatasetVersion(doc, info)); - d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); - d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); - d.setGeolocation(prepareDatasetGeoLocations(doc, info)); - oafs.add(d); - break; - case "software": - final Software s = new Software(); - populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); - s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); - s.setLicense(prepareSoftwareLicenses(doc, info)); - s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); - s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); - oafs.add(s); - break; - case "": - case "otherresearchproducts": - default: - final OtherResearchProduct o = new OtherResearchProduct(); - populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - o.setResulttype(ORP_DEFAULT_RESULTTYPE); - o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); - o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); - o.setTool(prepareOtherResearchProductTools(doc, info)); - oafs.add(o); - break; + case "publication": + final Publication p = new Publication(); + populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); + p.setJournal(prepareJournal(doc, info)); + oafs.add(p); + break; + case "dataset": + final Dataset d = new Dataset(); + populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + d.setResulttype(DATASET_DEFAULT_RESULTTYPE); + d.setStoragedate(prepareDatasetStorageDate(doc, info)); + d.setDevice(prepareDatasetDevice(doc, info)); + d.setSize(prepareDatasetSize(doc, info)); + d.setVersion(prepareDatasetVersion(doc, info)); + d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); + d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); + d.setGeolocation(prepareDatasetGeoLocations(doc, info)); + oafs.add(d); + break; + case "software": + final Software s = new Software(); + populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); + s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); + s.setLicense(prepareSoftwareLicenses(doc, info)); + s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); + s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); + oafs.add(s); + break; + case "": + case "otherresearchproducts": + default: + final OtherResearchProduct o = new OtherResearchProduct(); + populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); + o.setResulttype(ORP_DEFAULT_RESULTTYPE); + o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); + o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); + o.setTool(prepareOtherResearchProductTools(doc, info)); + oafs.add(o); + break; } if (!oafs.isEmpty()) { @@ -201,9 +211,15 @@ public abstract class AbstractMdRecordToOafMapper { final String projectId = createOpenaireId(40, originalId, true); res - .add(getRelation(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, + lastUpdateTimestamp)); res - .add(getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, + lastUpdateTimestamp)); } } @@ -398,7 +414,9 @@ public abstract class AbstractMdRecordToOafMapper { final String sp = n.valueOf("@sp"); final String vol = n.valueOf("@vol"); final String edition = n.valueOf("@edition"); - if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); } + if (StringUtils.isNotBlank(name)) { + return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); + } } return null; } @@ -451,7 +469,10 @@ public abstract class AbstractMdRecordToOafMapper { for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; res - .add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info)); + .add( + structuredProperty( + n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), + n.valueOf("@schemename"), info)); } return res; } @@ -459,7 +480,9 @@ public abstract class AbstractMdRecordToOafMapper { protected OAIProvenance prepareOAIprovenance(final Document doc) { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); - if (n == null) { return null; } + if (n == null) { + return null; + } final String identifier = n.valueOf("./*[local-name()='identifier']"); final String baseURL = n.valueOf("./*[local-name()='baseURL']"); @@ -474,7 +497,9 @@ public abstract class AbstractMdRecordToOafMapper { protected DataInfo prepareDataInfo(final Document doc) { final Node n = doc.selectSingleNode("//oaf:datainfo"); - if (n == null) { return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } + if (n == null) { + return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); + } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); @@ -486,7 +511,9 @@ public abstract class AbstractMdRecordToOafMapper { final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); final String trust = n.valueOf("./oaf:trust"); - return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); + return dataInfo( + deletedbyinference, inferenceprovenance, inferred, false, + qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); } protected Field prepareField(final Node node, final String xpath, final DataInfo info) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index d184364174..0d140e9adc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -56,8 +56,9 @@ public class GenerateEntitiesApplication { public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(GenerateEntitiesApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json"))); + .toString( + GenerateEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json"))); parser.parseArgument(args); @@ -105,21 +106,23 @@ public class GenerateEntitiesApplication { for (final String sp : existingSourcePaths) { inputRdd = inputRdd - .union(sc - .sequenceFile(sp, Text.class, Text.class) - .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) - .map(k -> convertToListOaf(k._1(), k._2(), code2name)) - .filter(Objects::nonNull) - .flatMap(list -> list.iterator())); + .union( + sc + .sequenceFile(sp, Text.class, Text.class) + .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) + .map(k -> convertToListOaf(k._1(), k._2(), code2name)) + .filter(Objects::nonNull) + .flatMap(list -> list.iterator())); } inputRdd .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) .reduceByKey((o1, o2) -> merge(o1, o2)) .map(Tuple2::_2) - .map(oaf -> oaf.getClass().getSimpleName().toLowerCase() - + "|" - + OBJECT_MAPPER.writeValueAsString(oaf)) + .map( + oaf -> oaf.getClass().getSimpleName().toLowerCase() + + "|" + + OBJECT_MAPPER.writeValueAsString(oaf)) .saveAsTextFile(targetPath, GzipCodec.class); } @@ -141,28 +144,28 @@ public class GenerateEntitiesApplication { final String type = StringUtils.substringAfter(id, ":"); switch (type.toLowerCase()) { - case "native_oaf": - return new OafToOafMapper(code2name).processMdRecord(s); - case "native_odf": - return new OdfToOafMapper(code2name).processMdRecord(s); - case "datasource": - return Arrays.asList(convertFromJson(s, Datasource.class)); - case "organization": - return Arrays.asList(convertFromJson(s, Organization.class)); - case "project": - return Arrays.asList(convertFromJson(s, Project.class)); - case "relation": - return Arrays.asList(convertFromJson(s, Relation.class)); - case "publication": - return Arrays.asList(convertFromJson(s, Publication.class)); - case "dataset": - return Arrays.asList(convertFromJson(s, Dataset.class)); - case "software": - return Arrays.asList(convertFromJson(s, Software.class)); - case "otherresearchproduct": - return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); - default: - throw new RuntimeException("type not managed: " + type.toLowerCase()); + case "native_oaf": + return new OafToOafMapper(code2name).processMdRecord(s); + case "native_odf": + return new OdfToOafMapper(code2name).processMdRecord(s); + case "datasource": + return Arrays.asList(convertFromJson(s, Datasource.class)); + case "organization": + return Arrays.asList(convertFromJson(s, Organization.class)); + case "project": + return Arrays.asList(convertFromJson(s, Project.class)); + case "relation": + return Arrays.asList(convertFromJson(s, Relation.class)); + case "publication": + return Arrays.asList(convertFromJson(s, Publication.class)); + case "dataset": + return Arrays.asList(convertFromJson(s, Dataset.class)); + case "software": + return Arrays.asList(convertFromJson(s, Software.class)); + case "otherresearchproduct": + return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); + default: + throw new RuntimeException("type not managed: " + type.toLowerCase()); } } @@ -194,8 +197,10 @@ public class GenerateEntitiesApplication { private static Map loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); - final String xquery = - IOUtils.toString(GenerateEntitiesApplication.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); + final String xquery = IOUtils + .toString( + GenerateEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); final Map map = new HashMap<>(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index a0984eed42..7231332074 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -130,7 +130,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype( + prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); @@ -146,13 +147,14 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); instance - .setUrl(nodes - .stream() - .filter(n -> StringUtils.isNotBlank(n.getText())) - .map(n -> n.getText().trim()) - .filter(u -> u.startsWith("http")) - .distinct() - .collect(Collectors.toCollection(ArrayList::new))); + .setUrl( + nodes + .stream() + .filter(n -> StringUtils.isNotBlank(n.getText())) + .map(n -> n.getText().trim()) + .filter(u -> u.startsWith("http")) + .distinct() + .collect(Collectors.toCollection(ArrayList::new))); return Lists.newArrayList(instance); } @@ -277,9 +279,15 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final String otherId = createOpenaireId(50, originalId, false); res - .add(getRelation(docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, + lastUpdateTimestamp)); res - .add(getRelation(otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, + lastUpdateTimestamp)); } } return res; @@ -292,6 +300,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareResultPids(final Document doc, final DataInfo info) { - return prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info); + return prepareListStructProps( + doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 64af8a7512..bd60a658f7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -120,7 +120,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Instance instance = new Instance(); instance - .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); + .setInstancetype( + prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); @@ -169,7 +170,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { && !dateType.equalsIgnoreCase("Updated") && !dateType.equalsIgnoreCase("Available")) { res - .add(structuredProperty(((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info)); + .add( + structuredProperty( + ((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, + info)); } } return res; @@ -221,14 +225,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected List> prepareOtherResearchProductContactGroups( final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); + return prepareListFields( + doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); } @Override protected List> prepareOtherResearchProductContactPersons( final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); + return prepareListFields( + doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); } @Override @@ -254,7 +260,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected List> prepareSoftwareDocumentationUrls( final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); + return prepareListFields( + doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); } // DATASETS @@ -328,16 +335,29 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { if (type.equalsIgnoreCase("IsSupplementTo")) { res - .add(getRelation(docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, + lastUpdateTimestamp)); res - .add(getRelation(otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, + lastUpdateTimestamp)); } else if (type.equals("IsPartOf")) { res - .add(getRelation(docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, lastUpdateTimestamp)); + .add( + getRelation( + docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, + lastUpdateTimestamp)); res - .add(getRelation(otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, lastUpdateTimestamp)); - } else {} + .add( + getRelation( + otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, + lastUpdateTimestamp)); + } else { + } } } return res; @@ -345,15 +365,28 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { - return prepareQualifier(doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, DNET_DATA_CITE_RESOURCE); + return prepareQualifier( + doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, + DNET_DATA_CITE_RESOURCE); } @Override protected List prepareResultPids(final Document doc, final DataInfo info) { final List res = new ArrayList<>(); - res.addAll(prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); - res.addAll(prepareListStructProps(doc, "//datacite:identifier[@identifierType != 'URL']", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); - res.addAll(prepareListStructProps(doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL']", "@alternateIdentifierType", "dnet:pid_types", "dnet:pid_types", info)); + res + .addAll( + prepareListStructProps( + doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + res + .addAll( + prepareListStructProps( + doc, "//datacite:identifier[@identifierType != 'URL']", "@identifierType", "dnet:pid_types", + "dnet:pid_types", info)); + res + .addAll( + prepareListStructProps( + doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL']", + "@alternateIdentifierType", "dnet:pid_types", "dnet:pid_types", info)); return res; } From b81f2741d24971175f5a2be5f124b54b1bcc5958 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 26 May 2020 13:13:17 +0200 Subject: [PATCH 25/25] xquery --- .../eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery new file mode 100644 index 0000000000..4938c0aa42 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery @@ -0,0 +1,5 @@ +for $x in collection(' /db/DRIVER/VocabularyDSResources/VocabularyDSResourceType') + let $vocid := $x//VOCABULARY_NAME/@code + let $vocname := $x//VOCABULARY_NAME/text() + for $term in ($x//TERM) + return concat($vocid,' @=@ ',$vocname,' @=@ ',$term/@code,' @=@ ',$term/@english_name)