diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json new file mode 100644 index 000000000..ee737ce26 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json @@ -0,0 +1,20 @@ +{"key":"50|acm_________::3133635707788d2180bcef09e01a903c","valueSet":["20|dedup_wf_001::5ab59ffa94c31a140d4a56c594ea5865"]} +{"key":"50|core________::0308a76f6f8bc4db75a817d53a7e76a4","valueSet":["20|wt__________::a72760363ca885e6bef165804770e00c","20|nih_________::5c7f089c177ba49f92033f72e2aff724","20|dedup_wf_001::c88bf88e0a4dea271a3e2f832d952238"]} +{"key":"50|core________::04c8f896aef9e54867f2bf4236e9c810","valueSet":["20|snsf________::1496b1b4fc4d5509b16f2c217be480dc","20|dedup_wf_001::06731b587a9ea654103a6b0ebcb234ff","20|nih_________::c5722b087a5e707a50aa8f9f2ebf785d","20|snsf________::71d0a944b61b1a94068595f840005a2f","20|nih_________::dd3428794aef214a3bc2cad6dd548ba6","20|rcuk________::45aac2108e54b6503d1e611aa5872c03","20|nih_________::e1d47fdb7bba9eaeed82a95c578d6e90","20|dedup_wf_001::e3b52200d2fd4ff883478f5bef312efe","20|snsf________::c5c565d3422a7eb22886f3a4c93c32ea","20|nih_________::91154321f75ba26021efa18f7eeaa541","20|wt__________::38013971ca0c021fd65abce2171b82eb","20|wt__________::a6114989a56a1dfae6cbb201d14823f0","20|snsf________::b7af2f99e1e06750a4664ae401802734","20|wt__________::757c54e33d4e925c8c17edf032cdfacc","20|wt__________::1d57a87af1bbc2b7e211305fc747c9ad","20|wt__________::7cbb8c06f702b8871948acd370df892f","20|dedup_wf_001::53a8606f32787c4b3c663fd90ee97b17","20|wt__________::8473a929b793e56d2299a1a5aa08f617","20|nih_________::5e0fc2ef31bc189207f250db818fea0e","20|nih_________::1cd08fd26ef03fd2f51e9aeb34ed9486","20|nih_________::1c270e0dd2552b4e3cf295cdb7db1cc9","20|wt__________::04abd842647bdbc751b1eebe2f142013","20|nsf_________::3eaa8be3f16b0f0d7563b9117cd1f660","20|dedup_wf_001::c1b81dadf1e4cbf23a61833ff9ae8a31","20|nih_________::3716e1c14ab7ca14161278c9bbb8bdbb","20|dedup_wf_001::b7b403a764ea4e3acb12d999675aa73c","20|nih_________::300049f12fa0f5bc37db3a5636869743","20|wt__________::ed1e2be693353d370066fddbf862f23a","20|nsf_________::72a3747a18c56f3701494a0c0eadc5c9","20|rcuk________::e8877abcab4bc187339a242aa5bc2e09","20|microsoft___::119a535bfd240d7560fe4efec416bcd2","20|wt__________::be4e939abf9617557a35862e255493da","20|dedup_wf_001::3f6f17996747467f6047dfe019c8e4c7","20|snsf________::1f4e34433767faf965f33849bb0f7fb1","20|dedup_wf_001::9f1647eae28911113d1dcbe49c42275b","20|opendoar____::6835febcf408fe892504bdfd2ebe669c","20|dedup_wf_001::528bd21573101493c6c7d120b17a67e9"]} +{"key":"50|core________::061dc912098a6a52e85a824161bd32a2","valueSet":["20|dedup_wf_001::67f983a98e2c0cc0121e2db46d2bd00a","20|wt__________::59b5d99b2dde58df1655e0e5bb236c0a","20|wt__________::e84b06dbc1b26f413791c1304ca8d6a3","20|dedup_wf_001::7b118dab509f49b4fbd6dccfdbc479af","20|wt__________::53c2c2895613ff9b759f64c24b6cb17c","20|nih_________::43255cf9c16732bc4ec1d5f580f44928","20|gsrt________::455b984b47945e1fd04e92c9c0eeca04","20|dedup_wf_001::e712c08f721e8f167f93888f590314ea","20|rcuk________::8b0aee2a7026dc92d4c05683ae45c894","20|dedup_wf_001::3c19a02cea18f8eeb2034b6acc544b7e","20|wt__________::26a9d64d00b8e1005cb6bbad2b7364cf","20|rcuk________::e57ab9be7acd7b93ba34aafab1a5b96c","20|nih_________::ca09277064edbd89f71e1602d98b4dd8","20|gsrt________::7c309ee758e6c6e0dce43c67b9343e82","20|nsf_________::53c2c2895613ff9b759f64c24b6cb17c","20|nih_________::c57387345f51a40ad2284089b120be3f"]} +{"key":"50|core________::088190cf9dc9632e8d9ba5f5f1de1303","valueSet":["20|dedup_wf_001::286621caef868bbdc52918699e3cdc79","20|dedup_wf_001::84707a912d45d666fef35c5cd77fc203","20|dedup_wf_001::30fd03f4977438b0471a1d4db6317e71","20|dedup_wf_001::83d9dc4a0eba8737819b87ce8e737a49","20|dedup_wf_001::aeb9e738d873acebdca52c9ccd9559bd","20|dedup_wf_001::427a8bfe68648f9e30cb47434144da68","20|nih_________::ffa0ea174845f1548e520a047cf53165"]} +{"key":"50|core________::117d295998199f498fa561e9c26e7ae3","valueSet":["20|dedup_wf_001::d9f82e62c3114dc7f42b9da9b7f6fc64"]} +{"key":"50|core________::168a8f2e4ffe9b0e7c6bc100f34f2de5","valueSet":["20|wt__________::63fe73f079c5ff4e925c6cfc1758a98b","20|dedup_wf_001::b3c2f5e700cee15ad9752ab961df5930"]} +{"key":"50|core________::16a3b520030d82ad16a30992b124e69d","valueSet":["20|wt__________::a72760363ca885e6bef165804770e00c","20|nih_________::5c7f089c177ba49f92033f72e2aff724","20|dedup_wf_001::c88bf88e0a4dea271a3e2f832d952238"]} +{"key":"50|core________::172e3da668d18f41ea4ccdf7f2f39e53","valueSet":["20|nih_________::126cbf4b13249e65098ddb4835f47456","20|aka_________::506e3d2f7507a66584b8b3430ade20cb","20|dedup_wf_001::4746df4ff8bbb7e991ad343ccff8bbc7","20|wt__________::aff5133ca9cf0b810cc331d498bac9b0","20|wt__________::0c3bf892603817e5eff6e4f08a530ea2"]} +{"key":"50|core________::19f2fc91fe1db2ad62db598aa9aa8ab3","valueSet":["20|dedup_wf_001::dbbd988f8d57a9d11286caefdf35acaa"]} +{"key":"50|core________::1dceb5a29cd42728e410474fe0fda191","valueSet":["20|wt__________::b1ef2f643c948a2ef49005f9145ed556","20|dedup_wf_001::866fa622e3c0ab6227cd462f40cdcac8","20|rcuk________::63ecf5736189d299fc3e043e14428b8d","20|nsf_________::fcf880eab7315e0a5f3937c5a16c04b0","20|dedup_wf_001::65862ec7d57f700a130dee916bea66de"]} +{"key":"50|core________::2580c0b59b7457f571acdc829d1765a3","valueSet":["20|doajarticles::0f6e2c32a27c307b06edf7862c591973","20|opendoar____::4f10fb61c457cf124e5917391baaa3c2"]} +{"key":"50|core________::2624b8248a9febdad9bc456d358b30ed","valueSet":["20|dedup_wf_001::fb4eba3cea53264bddd59a4ade9973b3","20|rcuk________::b00968d2100a4b62447841aef5bdff62"]} +{"key":"50|core________::26820a282ef54882f7a5be74767fc02b","valueSet":["20|rcuk________::8ad6d06f3b4d09dc67142c158c7cf5b9","20|rcuk________::01ad471b66687b1213ceb08b5d7aa6c2"]} +{"key":"50|core________::2a8de3e0bbcab49066aa9de4bbb89bfa","valueSet":["20|dedup_wf_001::2ea78875d19c8cea63f7e958e5204136","20|corda_______::6821a8e260b8b97f5fb5e80168329d5b","20|dedup_wf_001::9d0ba437d73b19f55b53c578ac970ea2"]} +{"key":"50|core________::2c7d139419d2895d3bf0112b50108f75","valueSet":["20|dedup_wf_001::96ada508ea5d85a1e516bf9799413906","20|dedup_wf_001::d0ea749da6988bcdb2f30d77c64e2f1e","20|wt__________::f1ba5bd552edf15db494dc3020f27470","20|nih_________::ceeae4f78a5666daf4c45acdbbedde99","20|wt__________::84ef588eeeb4ef77e45ccfbbf3aef69c","20|wt__________::8eef7e1370ea81c2aa3dbc239b2bf5d8"]} +{"key":"50|core________::2cf1f6282498fa37aeaf678f8c6e5843","valueSet":["20|snsf________::73999c828ca67fd2d006100a8369c1eb"]} +{"key":"50|core________::2dffff00500c2354b506814f6a1ec148","valueSet":["20|wt__________::c6d89e908582fddf3e4c658a458807c3","20|wt__________::e7b2c9f3d3f3f1503092bf1ba2b163db","20|gsrt________::ab510bb43d6c654ed3d37b9c5ed5c971","20|dedup_wf_001::179d0313fa7d5fb2bef5f312ecdd16fe","20|gsrt________::cbffb510b01e81cc055fe61105c86154","20|opendoar____::5d462d78d512c1184dd384ef2dc35b7e","20|dedup_wf_001::646f14555ea42b260499239a7231b285","20|wt__________::5d462d78d512c1184dd384ef2dc35b7e","20|nih_________::a32a254b024265db2e24a66291c7c1e0","20|dedup_wf_001::5490ec18da9721e2c8d974fb73c62467","20|dedup_wf_001::3bc91ed90f44d0908258e132659bc754"]} +{"key":"50|core________::3031a50bf5c80865af4841ab42aaf57e","valueSet":["20|nih_________::1b46e3665d8be2b524c285a27ca952b8","20|nsf_________::71450a4b98015592ee3f525a51584608","20|snsf________::fc921725875adb56f2275579b31f805c","20|aka_________::fa5b7357f86c71ea15734282054f1183","20|wt__________::18fdb5b42b22fdcc45e323eb4d20c91b","20|wt__________::71450a4b98015592ee3f525a51584608","20|dedup_wf_001::8aaf46d4e4919dc55b8a5cac7a15399f"]} +{"key":"50|core________::31116372ae189ee456fc06dfa0f6cf7a","valueSet":["20|aka_________::c5b9aa0a905f89c51221f9f4fda22b20","20|aka_________::d9d3242062a7a3c483a7926fdba17bb6","20|nih_________::ede5c9e31cfb37a397d6cfe1940d045e","20|wt__________::8adcc12ffee195ae46679e8cf332a364","20|wt__________::5e954c57b0ac7aaf3fc16deeaf442389","20|snsf________::ddd964d550bfc6e1ce18f83655ba6901","20|rcuk________::a705d2ee7bf0bd225264b4a5794795ce","20|nih_________::8adcc12ffee195ae46679e8cf332a364","20|microsoft___::53732c6c7bb9daf5953fdb61fc0cd5bd"]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz deleted file mode 100644 index 0c12568e0..000000000 Binary files a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz and /dev/null differ diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json new file mode 100644 index 000000000..a3fcffe92 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json @@ -0,0 +1,20 @@ +{"key":"50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523","valueSet":["20|dedup_wf_001::5ab59ffa94c31a140d4a56c594ea5865"]} +{"key":"50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218","valueSet":["20|wt__________::a72760363ca885e6bef165804770e00c","20|nih_________::5c7f089c177ba49f92033f72e2aff724","20|dedup_wf_001::c88bf88e0a4dea271a3e2f832d952238"]} +{"key":"50|od______1582::6e7a9b21a2feef45673890432af34244","valueSet":["20|snsf________::1496b1b4fc4d5509b16f2c217be480dc","20|dedup_wf_001::06731b587a9ea654103a6b0ebcb234ff","20|nih_________::c5722b087a5e707a50aa8f9f2ebf785d","20|snsf________::71d0a944b61b1a94068595f840005a2f","20|nih_________::dd3428794aef214a3bc2cad6dd548ba6","20|rcuk________::45aac2108e54b6503d1e611aa5872c03","20|nih_________::e1d47fdb7bba9eaeed82a95c578d6e90","20|dedup_wf_001::e3b52200d2fd4ff883478f5bef312efe","20|snsf________::c5c565d3422a7eb22886f3a4c93c32ea","20|nih_________::91154321f75ba26021efa18f7eeaa541","20|wt__________::38013971ca0c021fd65abce2171b82eb","20|wt__________::a6114989a56a1dfae6cbb201d14823f0","20|snsf________::b7af2f99e1e06750a4664ae401802734","20|wt__________::757c54e33d4e925c8c17edf032cdfacc","20|wt__________::1d57a87af1bbc2b7e211305fc747c9ad","20|wt__________::7cbb8c06f702b8871948acd370df892f","20|dedup_wf_001::53a8606f32787c4b3c663fd90ee97b17","20|wt__________::8473a929b793e56d2299a1a5aa08f617","20|nih_________::5e0fc2ef31bc189207f250db818fea0e","20|nih_________::1cd08fd26ef03fd2f51e9aeb34ed9486","20|nih_________::1c270e0dd2552b4e3cf295cdb7db1cc9","20|wt__________::04abd842647bdbc751b1eebe2f142013","20|nsf_________::3eaa8be3f16b0f0d7563b9117cd1f660","20|dedup_wf_001::c1b81dadf1e4cbf23a61833ff9ae8a31","20|nih_________::3716e1c14ab7ca14161278c9bbb8bdbb","20|dedup_wf_001::b7b403a764ea4e3acb12d999675aa73c","20|nih_________::300049f12fa0f5bc37db3a5636869743","20|wt__________::ed1e2be693353d370066fddbf862f23a","20|nsf_________::72a3747a18c56f3701494a0c0eadc5c9","20|rcuk________::e8877abcab4bc187339a242aa5bc2e09","20|microsoft___::119a535bfd240d7560fe4efec416bcd2","20|wt__________::be4e939abf9617557a35862e255493da","20|dedup_wf_001::3f6f17996747467f6047dfe019c8e4c7","20|snsf________::1f4e34433767faf965f33849bb0f7fb1","20|dedup_wf_001::9f1647eae28911113d1dcbe49c42275b","20|opendoar____::6835febcf408fe892504bdfd2ebe669c","20|dedup_wf_001::528bd21573101493c6c7d120b17a67e9"]} +{"key":"50|core________::061dc912098a6a52e85a824161bd32a2","valueSet":["20|dedup_wf_001::67f983a98e2c0cc0121e2db46d2bd00a","20|wt__________::59b5d99b2dde58df1655e0e5bb236c0a","20|wt__________::e84b06dbc1b26f413791c1304ca8d6a3","20|dedup_wf_001::7b118dab509f49b4fbd6dccfdbc479af","20|wt__________::53c2c2895613ff9b759f64c24b6cb17c","20|nih_________::43255cf9c16732bc4ec1d5f580f44928","20|gsrt________::455b984b47945e1fd04e92c9c0eeca04","20|dedup_wf_001::e712c08f721e8f167f93888f590314ea","20|rcuk________::8b0aee2a7026dc92d4c05683ae45c894","20|dedup_wf_001::3c19a02cea18f8eeb2034b6acc544b7e","20|wt__________::26a9d64d00b8e1005cb6bbad2b7364cf","20|rcuk________::e57ab9be7acd7b93ba34aafab1a5b96c","20|nih_________::ca09277064edbd89f71e1602d98b4dd8","20|gsrt________::7c309ee758e6c6e0dce43c67b9343e82","20|nsf_________::53c2c2895613ff9b759f64c24b6cb17c","20|nih_________::c57387345f51a40ad2284089b120be3f"]} +{"key":"50|core________::088190cf9dc9632e8d9ba5f5f1de1303","valueSet":["20|dedup_wf_001::286621caef868bbdc52918699e3cdc79","20|dedup_wf_001::84707a912d45d666fef35c5cd77fc203","20|dedup_wf_001::30fd03f4977438b0471a1d4db6317e71","20|dedup_wf_001::83d9dc4a0eba8737819b87ce8e737a49","20|dedup_wf_001::aeb9e738d873acebdca52c9ccd9559bd","20|dedup_wf_001::427a8bfe68648f9e30cb47434144da68","20|nih_________::ffa0ea174845f1548e520a047cf53165"]} +{"key":"50|od_______109::f375befa62a741e9250e55bcfa88f9a6","valueSet":["20|dedup_wf_001::d9f82e62c3114dc7f42b9da9b7f6fc64"]} +{"key":"50|core________::168a8f2e4ffe9b0e7c6bc100f34f2de5","valueSet":["20|wt__________::63fe73f079c5ff4e925c6cfc1758a98b","20|dedup_wf_001::b3c2f5e700cee15ad9752ab961df5930"]} +{"key":"50|core________::16a3b520030d82ad16a30992b124e69d","valueSet":["20|wt__________::a72760363ca885e6bef165804770e00c","20|nih_________::5c7f089c177ba49f92033f72e2aff724","20|dedup_wf_001::c88bf88e0a4dea271a3e2f832d952238"]} +{"key":"50|core________::172e3da668d18f41ea4ccdf7f2f39e53","valueSet":["20|nih_________::126cbf4b13249e65098ddb4835f47456","20|aka_________::506e3d2f7507a66584b8b3430ade20cb","20|dedup_wf_001::4746df4ff8bbb7e991ad343ccff8bbc7","20|wt__________::aff5133ca9cf0b810cc331d498bac9b0","20|wt__________::0c3bf892603817e5eff6e4f08a530ea2"]} +{"key":"50|core________::19f2fc91fe1db2ad62db598aa9aa8ab3","valueSet":["20|dedup_wf_001::dbbd988f8d57a9d11286caefdf35acaa"]} +{"key":"50|core________::1dceb5a29cd42728e410474fe0fda191","valueSet":["20|wt__________::b1ef2f643c948a2ef49005f9145ed556","20|dedup_wf_001::866fa622e3c0ab6227cd462f40cdcac8","20|rcuk________::63ecf5736189d299fc3e043e14428b8d","20|nsf_________::fcf880eab7315e0a5f3937c5a16c04b0","20|dedup_wf_001::65862ec7d57f700a130dee916bea66de"]} +{"key":"50|core________::2580c0b59b7457f571acdc829d1765a3","valueSet":["20|doajarticles::0f6e2c32a27c307b06edf7862c591973","20|opendoar____::4f10fb61c457cf124e5917391baaa3c2"]} +{"key":"50|core________::2624b8248a9febdad9bc456d358b30ed","valueSet":["20|dedup_wf_001::fb4eba3cea53264bddd59a4ade9973b3","20|rcuk________::b00968d2100a4b62447841aef5bdff62"]} +{"key":"50|core________::26820a282ef54882f7a5be74767fc02b","valueSet":["20|rcuk________::8ad6d06f3b4d09dc67142c158c7cf5b9","20|rcuk________::01ad471b66687b1213ceb08b5d7aa6c2"]} +{"key":"50|core________::2a8de3e0bbcab49066aa9de4bbb89bfa","valueSet":["20|dedup_wf_001::2ea78875d19c8cea63f7e958e5204136","20|corda_______::6821a8e260b8b97f5fb5e80168329d5b","20|dedup_wf_001::9d0ba437d73b19f55b53c578ac970ea2"]} +{"key":"50|core________::2c7d139419d2895d3bf0112b50108f75","valueSet":["20|dedup_wf_001::96ada508ea5d85a1e516bf9799413906","20|dedup_wf_001::d0ea749da6988bcdb2f30d77c64e2f1e","20|wt__________::f1ba5bd552edf15db494dc3020f27470","20|nih_________::ceeae4f78a5666daf4c45acdbbedde99","20|wt__________::84ef588eeeb4ef77e45ccfbbf3aef69c","20|wt__________::8eef7e1370ea81c2aa3dbc239b2bf5d8"]} +{"key":"50|core________::2cf1f6282498fa37aeaf678f8c6e5843","valueSet":["20|snsf________::73999c828ca67fd2d006100a8369c1eb"]} +{"key":"50|core________::2dffff00500c2354b506814f6a1ec148","valueSet":["20|wt__________::c6d89e908582fddf3e4c658a458807c3","20|wt__________::e7b2c9f3d3f3f1503092bf1ba2b163db","20|gsrt________::ab510bb43d6c654ed3d37b9c5ed5c971","20|dedup_wf_001::179d0313fa7d5fb2bef5f312ecdd16fe","20|gsrt________::cbffb510b01e81cc055fe61105c86154","20|opendoar____::5d462d78d512c1184dd384ef2dc35b7e","20|dedup_wf_001::646f14555ea42b260499239a7231b285","20|wt__________::5d462d78d512c1184dd384ef2dc35b7e","20|nih_________::a32a254b024265db2e24a66291c7c1e0","20|dedup_wf_001::5490ec18da9721e2c8d974fb73c62467","20|dedup_wf_001::3bc91ed90f44d0908258e132659bc754"]} +{"key":"50|core________::3031a50bf5c80865af4841ab42aaf57e","valueSet":["20|nih_________::1b46e3665d8be2b524c285a27ca952b8","20|nsf_________::71450a4b98015592ee3f525a51584608","20|snsf________::fc921725875adb56f2275579b31f805c","20|aka_________::fa5b7357f86c71ea15734282054f1183","20|wt__________::18fdb5b42b22fdcc45e323eb4d20c91b","20|wt__________::71450a4b98015592ee3f525a51584608","20|dedup_wf_001::8aaf46d4e4919dc55b8a5cac7a15399f"]} +{"key":"50|core________::31116372ae189ee456fc06dfa0f6cf7a","valueSet":["20|aka_________::c5b9aa0a905f89c51221f9f4fda22b20","20|aka_________::d9d3242062a7a3c483a7926fdba17bb6","20|nih_________::ede5c9e31cfb37a397d6cfe1940d045e","20|wt__________::8adcc12ffee195ae46679e8cf332a364","20|wt__________::5e954c57b0ac7aaf3fc16deeaf442389","20|snsf________::ddd964d550bfc6e1ce18f83655ba6901","20|rcuk________::a705d2ee7bf0bd225264b4a5794795ce","20|nih_________::8adcc12ffee195ae46679e8cf332a364","20|microsoft___::53732c6c7bb9daf5953fdb61fc0cd5bd"]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz deleted file mode 100644 index 0f8d6fdeb..000000000 Binary files a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz and /dev/null differ diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json new file mode 100644 index 000000000..ee737ce26 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json @@ -0,0 +1,20 @@ +{"key":"50|acm_________::3133635707788d2180bcef09e01a903c","valueSet":["20|dedup_wf_001::5ab59ffa94c31a140d4a56c594ea5865"]} +{"key":"50|core________::0308a76f6f8bc4db75a817d53a7e76a4","valueSet":["20|wt__________::a72760363ca885e6bef165804770e00c","20|nih_________::5c7f089c177ba49f92033f72e2aff724","20|dedup_wf_001::c88bf88e0a4dea271a3e2f832d952238"]} +{"key":"50|core________::04c8f896aef9e54867f2bf4236e9c810","valueSet":["20|snsf________::1496b1b4fc4d5509b16f2c217be480dc","20|dedup_wf_001::06731b587a9ea654103a6b0ebcb234ff","20|nih_________::c5722b087a5e707a50aa8f9f2ebf785d","20|snsf________::71d0a944b61b1a94068595f840005a2f","20|nih_________::dd3428794aef214a3bc2cad6dd548ba6","20|rcuk________::45aac2108e54b6503d1e611aa5872c03","20|nih_________::e1d47fdb7bba9eaeed82a95c578d6e90","20|dedup_wf_001::e3b52200d2fd4ff883478f5bef312efe","20|snsf________::c5c565d3422a7eb22886f3a4c93c32ea","20|nih_________::91154321f75ba26021efa18f7eeaa541","20|wt__________::38013971ca0c021fd65abce2171b82eb","20|wt__________::a6114989a56a1dfae6cbb201d14823f0","20|snsf________::b7af2f99e1e06750a4664ae401802734","20|wt__________::757c54e33d4e925c8c17edf032cdfacc","20|wt__________::1d57a87af1bbc2b7e211305fc747c9ad","20|wt__________::7cbb8c06f702b8871948acd370df892f","20|dedup_wf_001::53a8606f32787c4b3c663fd90ee97b17","20|wt__________::8473a929b793e56d2299a1a5aa08f617","20|nih_________::5e0fc2ef31bc189207f250db818fea0e","20|nih_________::1cd08fd26ef03fd2f51e9aeb34ed9486","20|nih_________::1c270e0dd2552b4e3cf295cdb7db1cc9","20|wt__________::04abd842647bdbc751b1eebe2f142013","20|nsf_________::3eaa8be3f16b0f0d7563b9117cd1f660","20|dedup_wf_001::c1b81dadf1e4cbf23a61833ff9ae8a31","20|nih_________::3716e1c14ab7ca14161278c9bbb8bdbb","20|dedup_wf_001::b7b403a764ea4e3acb12d999675aa73c","20|nih_________::300049f12fa0f5bc37db3a5636869743","20|wt__________::ed1e2be693353d370066fddbf862f23a","20|nsf_________::72a3747a18c56f3701494a0c0eadc5c9","20|rcuk________::e8877abcab4bc187339a242aa5bc2e09","20|microsoft___::119a535bfd240d7560fe4efec416bcd2","20|wt__________::be4e939abf9617557a35862e255493da","20|dedup_wf_001::3f6f17996747467f6047dfe019c8e4c7","20|snsf________::1f4e34433767faf965f33849bb0f7fb1","20|dedup_wf_001::9f1647eae28911113d1dcbe49c42275b","20|opendoar____::6835febcf408fe892504bdfd2ebe669c","20|dedup_wf_001::528bd21573101493c6c7d120b17a67e9"]} +{"key":"50|core________::061dc912098a6a52e85a824161bd32a2","valueSet":["20|dedup_wf_001::67f983a98e2c0cc0121e2db46d2bd00a","20|wt__________::59b5d99b2dde58df1655e0e5bb236c0a","20|wt__________::e84b06dbc1b26f413791c1304ca8d6a3","20|dedup_wf_001::7b118dab509f49b4fbd6dccfdbc479af","20|wt__________::53c2c2895613ff9b759f64c24b6cb17c","20|nih_________::43255cf9c16732bc4ec1d5f580f44928","20|gsrt________::455b984b47945e1fd04e92c9c0eeca04","20|dedup_wf_001::e712c08f721e8f167f93888f590314ea","20|rcuk________::8b0aee2a7026dc92d4c05683ae45c894","20|dedup_wf_001::3c19a02cea18f8eeb2034b6acc544b7e","20|wt__________::26a9d64d00b8e1005cb6bbad2b7364cf","20|rcuk________::e57ab9be7acd7b93ba34aafab1a5b96c","20|nih_________::ca09277064edbd89f71e1602d98b4dd8","20|gsrt________::7c309ee758e6c6e0dce43c67b9343e82","20|nsf_________::53c2c2895613ff9b759f64c24b6cb17c","20|nih_________::c57387345f51a40ad2284089b120be3f"]} +{"key":"50|core________::088190cf9dc9632e8d9ba5f5f1de1303","valueSet":["20|dedup_wf_001::286621caef868bbdc52918699e3cdc79","20|dedup_wf_001::84707a912d45d666fef35c5cd77fc203","20|dedup_wf_001::30fd03f4977438b0471a1d4db6317e71","20|dedup_wf_001::83d9dc4a0eba8737819b87ce8e737a49","20|dedup_wf_001::aeb9e738d873acebdca52c9ccd9559bd","20|dedup_wf_001::427a8bfe68648f9e30cb47434144da68","20|nih_________::ffa0ea174845f1548e520a047cf53165"]} +{"key":"50|core________::117d295998199f498fa561e9c26e7ae3","valueSet":["20|dedup_wf_001::d9f82e62c3114dc7f42b9da9b7f6fc64"]} +{"key":"50|core________::168a8f2e4ffe9b0e7c6bc100f34f2de5","valueSet":["20|wt__________::63fe73f079c5ff4e925c6cfc1758a98b","20|dedup_wf_001::b3c2f5e700cee15ad9752ab961df5930"]} +{"key":"50|core________::16a3b520030d82ad16a30992b124e69d","valueSet":["20|wt__________::a72760363ca885e6bef165804770e00c","20|nih_________::5c7f089c177ba49f92033f72e2aff724","20|dedup_wf_001::c88bf88e0a4dea271a3e2f832d952238"]} +{"key":"50|core________::172e3da668d18f41ea4ccdf7f2f39e53","valueSet":["20|nih_________::126cbf4b13249e65098ddb4835f47456","20|aka_________::506e3d2f7507a66584b8b3430ade20cb","20|dedup_wf_001::4746df4ff8bbb7e991ad343ccff8bbc7","20|wt__________::aff5133ca9cf0b810cc331d498bac9b0","20|wt__________::0c3bf892603817e5eff6e4f08a530ea2"]} +{"key":"50|core________::19f2fc91fe1db2ad62db598aa9aa8ab3","valueSet":["20|dedup_wf_001::dbbd988f8d57a9d11286caefdf35acaa"]} +{"key":"50|core________::1dceb5a29cd42728e410474fe0fda191","valueSet":["20|wt__________::b1ef2f643c948a2ef49005f9145ed556","20|dedup_wf_001::866fa622e3c0ab6227cd462f40cdcac8","20|rcuk________::63ecf5736189d299fc3e043e14428b8d","20|nsf_________::fcf880eab7315e0a5f3937c5a16c04b0","20|dedup_wf_001::65862ec7d57f700a130dee916bea66de"]} +{"key":"50|core________::2580c0b59b7457f571acdc829d1765a3","valueSet":["20|doajarticles::0f6e2c32a27c307b06edf7862c591973","20|opendoar____::4f10fb61c457cf124e5917391baaa3c2"]} +{"key":"50|core________::2624b8248a9febdad9bc456d358b30ed","valueSet":["20|dedup_wf_001::fb4eba3cea53264bddd59a4ade9973b3","20|rcuk________::b00968d2100a4b62447841aef5bdff62"]} +{"key":"50|core________::26820a282ef54882f7a5be74767fc02b","valueSet":["20|rcuk________::8ad6d06f3b4d09dc67142c158c7cf5b9","20|rcuk________::01ad471b66687b1213ceb08b5d7aa6c2"]} +{"key":"50|core________::2a8de3e0bbcab49066aa9de4bbb89bfa","valueSet":["20|dedup_wf_001::2ea78875d19c8cea63f7e958e5204136","20|corda_______::6821a8e260b8b97f5fb5e80168329d5b","20|dedup_wf_001::9d0ba437d73b19f55b53c578ac970ea2"]} +{"key":"50|core________::2c7d139419d2895d3bf0112b50108f75","valueSet":["20|dedup_wf_001::96ada508ea5d85a1e516bf9799413906","20|dedup_wf_001::d0ea749da6988bcdb2f30d77c64e2f1e","20|wt__________::f1ba5bd552edf15db494dc3020f27470","20|nih_________::ceeae4f78a5666daf4c45acdbbedde99","20|wt__________::84ef588eeeb4ef77e45ccfbbf3aef69c","20|wt__________::8eef7e1370ea81c2aa3dbc239b2bf5d8"]} +{"key":"50|core________::2cf1f6282498fa37aeaf678f8c6e5843","valueSet":["20|snsf________::73999c828ca67fd2d006100a8369c1eb"]} +{"key":"50|core________::2dffff00500c2354b506814f6a1ec148","valueSet":["20|wt__________::c6d89e908582fddf3e4c658a458807c3","20|wt__________::e7b2c9f3d3f3f1503092bf1ba2b163db","20|gsrt________::ab510bb43d6c654ed3d37b9c5ed5c971","20|dedup_wf_001::179d0313fa7d5fb2bef5f312ecdd16fe","20|gsrt________::cbffb510b01e81cc055fe61105c86154","20|opendoar____::5d462d78d512c1184dd384ef2dc35b7e","20|dedup_wf_001::646f14555ea42b260499239a7231b285","20|wt__________::5d462d78d512c1184dd384ef2dc35b7e","20|nih_________::a32a254b024265db2e24a66291c7c1e0","20|dedup_wf_001::5490ec18da9721e2c8d974fb73c62467","20|dedup_wf_001::3bc91ed90f44d0908258e132659bc754"]} +{"key":"50|core________::3031a50bf5c80865af4841ab42aaf57e","valueSet":["20|nih_________::1b46e3665d8be2b524c285a27ca952b8","20|nsf_________::71450a4b98015592ee3f525a51584608","20|snsf________::fc921725875adb56f2275579b31f805c","20|aka_________::fa5b7357f86c71ea15734282054f1183","20|wt__________::18fdb5b42b22fdcc45e323eb4d20c91b","20|wt__________::71450a4b98015592ee3f525a51584608","20|dedup_wf_001::8aaf46d4e4919dc55b8a5cac7a15399f"]} +{"key":"50|core________::31116372ae189ee456fc06dfa0f6cf7a","valueSet":["20|aka_________::c5b9aa0a905f89c51221f9f4fda22b20","20|aka_________::d9d3242062a7a3c483a7926fdba17bb6","20|nih_________::ede5c9e31cfb37a397d6cfe1940d045e","20|wt__________::8adcc12ffee195ae46679e8cf332a364","20|wt__________::5e954c57b0ac7aaf3fc16deeaf442389","20|snsf________::ddd964d550bfc6e1ce18f83655ba6901","20|rcuk________::a705d2ee7bf0bd225264b4a5794795ce","20|nih_________::8adcc12ffee195ae46679e8cf332a364","20|microsoft___::53732c6c7bb9daf5953fdb61fc0cd5bd"]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz deleted file mode 100644 index 430bfe6bb..000000000 Binary files a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz and /dev/null differ diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 9f11fa49d..d925b3694 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -31,17 +31,6 @@ join datasource d on d.id = ri.collectedfrom where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp on tmp.id=p.id; -create table indi_pub_gold_oa stored as parquet as -select distinct p.id, coalesce(gold_oa, 0) as gold_oa -from publication p -left outer join ( -select p.id, 1 as gold_oa -from publication p -join result_instance ri on ri.id = p.id -join datasource on datasource.id = ri.hostedby -where datasource.id like '%doajarticles%') tmp -on p.id= tmp.id; - ---- Sprint 2 ---- create table indi_result_has_cc_licence stored as parquet as select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -144,16 +133,6 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp on pd.id=tmp.id; -create table indi_is_gold_oa stored as parquet as -(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa -from publication_datasources pd -left outer join ( -select pd.id, 1 as gold_oa from publication_datasources pd -join datasource d on d.id=pd.datasource -join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online) -where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp -on pd.id=tmp.id); - create table indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative from publication pd @@ -177,4 +156,216 @@ on tmp.id=ri.id; ---- Sprint 5 ---- create table indi_result_no_of_copies stored as parquet as -select id, count(id) as number_of_copies from result_instance group by id; \ No newline at end of file +select id, count(id) as number_of_copies from result_instance group by id; + +---- Sprint 6 ---- +create table indi_pub_gold_oa stored as parquet as +WITH gold_oa AS ( + SELECT issn_l, journal_is_in_doaj,journal_is_oa, issn_1 as issn + FROM stats_ext.oa_journals + WHERE issn_1 != "" + UNION ALL + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_2 as issn + FROM stats_ext.oa_journals + WHERE issn_2 != "" ), +issn AS ( + SELECT * FROM + (SELECT id, issn_printed as issn + FROM datasource WHERE issn_printed IS NOT NULL + UNION + SELECT id, issn_online as issn + FROM datasource WHERE issn_online IS NOT NULL) as issn + WHERE LENGTH(issn) > 7) +SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold +FROM publication_datasources pd +LEFT OUTER JOIN ( + SELECT pd.id, 1 as is_gold FROM publication_datasources pd + JOIN issn on issn.id=pd.datasource + JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; + +create table indi_datasets_gold_oa stored as parquet as +WITH gold_oa AS ( + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn + FROM stats_ext.oa_journals + WHERE issn_1 != "" + UNION + ALL SELECT issn_l,journal_is_in_doaj,journal_is_oa,issn_2 as issn + FROM stats_ext.oa_journals + WHERE issn_2 != "" ), +issn AS ( + SELECT * + FROM ( + SELECT id,issn_printed as issn + FROM datasource + WHERE issn_printed IS NOT NULL + UNION + SELECT id, issn_online as issn + FROM datasource + WHERE issn_online IS NOT NULL ) as issn + WHERE LENGTH(issn) > 7) +SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold +FROM dataset_datasources pd +LEFT OUTER JOIN ( + SELECT pd.id, 1 as is_gold FROM dataset_datasources pd + JOIN issn on issn.id=pd.datasource + JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; + +create table indi_software_gold_oa stored as parquet as +WITH gold_oa AS ( + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn + FROM stats_ext.oa_journals + WHERE issn_1 != "" + UNION + ALL SELECT issn_l,journal_is_in_doaj,journal_is_oa,issn_2 as issn + FROM stats_ext.oa_journals + WHERE issn_2 != "" ), +issn AS ( + SELECT * + FROM ( + SELECT id,issn_printed as issn + FROM datasource + WHERE issn_printed IS NOT NULL + UNION + SELECT id, issn_online as issn + FROM datasource + WHERE issn_online IS NOT NULL ) as issn + WHERE LENGTH(issn) > 7) +SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold +FROM software_datasources pd +LEFT OUTER JOIN ( + SELECT pd.id, 1 as is_gold FROM software_datasources pd + JOIN issn on issn.id=pd.datasource + JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; + +create table indi_org_findable stored as parquet as +with result_with_pid as ( + select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro + join result_pids rp on rp.id=ro.id + group by ro.organization), +result_has_abstract as ( + select ro.organization organization, count(distinct rp.id) no_result_with_abstract from result_organization ro + join result rp on rp.id=ro.id where rp.abstract=true + group by ro.organization), +allresults as ( + select organization, count(distinct id) no_allresults from result_organization + group by organization), +result_with_pid_share as ( + select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults pid_share + from allresults + join result_with_pid on result_with_pid.organization=allresults.organization), +result_with_abstract_share as ( + select allresults.organization, result_has_abstract.no_result_with_abstract/allresults.no_allresults abstract_share + from allresults + join result_has_abstract on result_has_abstract.organization=allresults.organization) +select allresults.organization, coalesce((pid_share+abstract_share)/2,pid_share) org_findable +from allresults +join result_with_pid_share on result_with_pid_share.organization=allresults.organization +left outer join ( + select organization, abstract_share from result_with_abstract_share) tmp on tmp.organization=allresults.organization; + +create table indi_org_openess stored as parquet as +WITH datasets_oa as ( + SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa_new dg + join openaire_prod_stats.result_organization ro on dg.id=ro.id + join openaire_prod_stats.dataset ds on dg.id=ds.id + WHERE dg.is_gold=1 + group by ro.organization), +software_oa as ( + SELECT ro.organization, count(dg.id) no_oasoftware FROM indi_software_gold_oa_new dg + join openaire_prod_stats.result_organization ro on dg.id=ro.id + join openaire_prod_stats.software ds on dg.id=ds.id + WHERE dg.is_gold=1 + group by ro.organization), +pubs_oa as ( + SELECT ro.organization, count(dg.id) no_oapubs FROM indi_pub_gold_oa_new dg + join openaire_prod_stats.result_organization ro on dg.id=ro.id + join openaire_prod_stats.publication ds on dg.id=ds.id + where dg.is_gold=1 + group by ro.organization), +allpubs as ( + SELECT ro.organization organization, count(ro.id) no_allpubs FROM result_organization ro + join openaire_prod_stats.publication ps on ps.id=ro.id + group by ro.organization), +alldatasets as ( + SELECT ro.organization organization, count(ro.id) no_alldatasets FROM result_organization ro + join openaire_prod_stats.dataset ps on ps.id=ro.id + group by ro.organization), +allsoftware as ( + SELECT ro.organization organization, count(ro.id) no_allsoftware FROM result_organization ro + join openaire_prod_stats.software ps on ps.id=ro.id + group by ro.organization), +allpubsshare as ( + select pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs + join pubs_oa on allpubs.organization=pubs_oa.organization), +alldatasetssshare as ( + select datasets_oa.organization, datasets_oa.no_oadatasets/alldatasets.no_alldatasets c + from alldatasets + join datasets_oa on alldatasets.organization=datasets_oa.organization), +allsoftwaresshare as ( + select software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsoftware s + from allsoftware + join software_oa on allsoftware.organization=software_oa.organization) +select allpubsshare.organization, coalesce((c+p+s)/3, p) org_openess +FROM allpubsshare +left outer join ( + select organization,c from + alldatasetssshare) tmp on tmp.organization=allpubsshare.organization +left outer join ( + select organization,s from allsoftwaresshare) tmp1 on tmp1.organization=allpubsshare.organization; + +create table indi_pub_hybrid_oa_with_cc stored as parquet as +WITH hybrid_oa AS ( + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn + FROM stats_ext.plan_s_jn + WHERE issn_print != "" + UNION ALL + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_online as issn + FROM stats_ext.plan_s_jn + WHERE issn_online != "" and (journal_is_in_doaj = FALSE OR journal_is_oa = FALSE)), +issn AS ( + SELECT * + FROM ( + SELECT id, issn_printed as issn + FROM datasource + WHERE issn_printed IS NOT NULL + UNION + SELECT id,issn_online as issn + FROM datasource + WHERE issn_online IS NOT NULL ) as issn + WHERE LENGTH(issn) > 7) +SELECT DISTINCT pd.id, coalesce(is_hybrid_oa, 0) as is_hybrid_oa +FROM publication_datasources pd +LEFT OUTER JOIN ( + SELECT pd.id, 1 as is_hybrid_oa from publication_datasources pd + JOIN datasource d on d.id=pd.datasource + JOIN issn on issn.id=pd.datasource + JOIN hybrid_oa ON issn.issn = hybrid_oa.issn + JOIN indi_result_has_cc_licence cc on pd.id=cc.id + where cc.has_cc_license=1) tmp on pd.id=tmp.id; + +create table indi_pub_downloads stored as parquet as +SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats +join publication on result_id=id +where downloads>0 +GROUP BY result_id +order by no_dowloads desc; + +create table indi_pub_downloads_datasource stored as parquet as +SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats +join publication on result_id=id +where downloads>0 +GROUP BY result_id, repository_id +order by result_id; + +create table indi_pub_downloads_year stored as parquet as +SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us +join publication on result_id=id where downloads>0 +GROUP BY result_id, `year` +order by `year` asc; + +create table indi_pub_downloads_datasource_year stored as parquet as +SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us +join publication on result_id=id +where downloads>0 +GROUP BY result_id, repository_id, `year` +order by `year` asc, result_id; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 9e4edb44a..b4964d2b3 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -123,6 +123,10 @@ create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * fro compute stats TARGET.indi_pub_doi_from_crossref; create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_gold_oa; +create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_datasets_gold_oa; +create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_software_gold_oa; create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_has_abstract; create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); @@ -130,7 +134,7 @@ compute stats TARGET.indi_result_has_cc_licence; create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_has_cc_licence_url; -create view TARGET.indi_funder_country_collab stored as parquet as select * from SOURCE.indi_funder_country_collab; +create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funder_country_collab; create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_with_orcid; @@ -148,8 +152,19 @@ compute stats TARGET.indi_pub_closed_other_open; create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_no_of_copies; ---- Usage statistics -create table TARGET.usage_stats stored as parquet as select * from SOURCE.usage_stats orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); +create view TARGET.indi_org_findable as select * from SOURCE.indi_org_findable; +create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess; +create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_hybrid_oa_with_cc; + +create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_downloads; +create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_downloads_datasource; +create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_downloads_year; +create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_downloads_datasource_year; --denorm alter table TARGET.result rename to TARGET.res_tmp;