forked from D-Net/dnet-hadoop
Compare commits
900 Commits
Author | SHA1 | Date |
---|---|---|
Michele Artini | c5b9a1592e | |
Miriam Baglioni | ed560dacc0 | |
Miriam Baglioni | 07a1f2b31c | |
Claudio Atzori | 80d7b842e4 | |
Claudio Atzori | dd397d107d | |
Giambattista Bloisi | 3152382ae8 | |
Claudio Atzori | a50e04154e | |
Claudio Atzori | c4e8aaca1f | |
Claudio Atzori | 1596d70224 | |
Claudio Atzori | 5d030d1118 | |
Claudio Atzori | 6e0b6a886f | |
Claudio Atzori | 3854fcc5e0 | |
Miriam Baglioni | 371154d74f | |
Claudio Atzori | 4e9f64e01a | |
Giambattista Bloisi | d175a9745f | |
Michele De Bonis | fe70caa33c | |
Claudio Atzori | 81bfe3fe32 | |
Miriam Baglioni | 0765641979 | |
Miriam Baglioni | d0eba032cd | |
Miriam Baglioni | 7cd8171268 | |
Miriam Baglioni | a54d021c37 | |
Miriam Baglioni | 6eea075324 | |
Claudio Atzori | 2ba67f08d3 | |
Miriam Baglioni | df39360822 | |
Claudio Atzori | c1a309df75 | |
Claudio Atzori | 5fdc286eb9 | |
Michele Artini | fa2532db30 | |
Michele Artini | b35d046fd2 | |
Claudio Atzori | e7f6eb82df | |
Claudio Atzori | 9c7711310e | |
Michele Artini | 0c66b8589d | |
Michele Artini | 2d7a7a962d | |
Michele Artini | 6b0f7cc8b0 | |
Michele Artini | 758d4acd05 | |
Michele Artini | 339d8124f2 | |
Michele Artini | 52bb7af03b | |
Michele Artini | 9073b1159d | |
Michele Artini | dcf09811a2 | |
Sandro La Bruzzo | 890190b7ae | |
Claudio Atzori | bfd05cdab2 | |
Claudio Atzori | 24b5dc97c6 | |
Michele Artini | 714a16854e | |
Michele Artini | a2fac78dcc | |
Claudio Atzori | c648531ccb | |
Michele Artini | 99b7adda0c | |
Michele Artini | bb9cee4f40 | |
Giambattista Bloisi | 10cad80d4d | |
Giambattista Bloisi | 37b9bdc10c | |
Giambattista Bloisi | e7150eea7b | |
Giambattista Bloisi | 23477f3e80 | |
Claudio Atzori | ce78752aa3 | |
Claudio Atzori | 152cb47375 | |
Miriam Baglioni | f1dc0050c7 | |
Miriam Baglioni | 42531afc3e | |
Miriam Baglioni | 907eeadce8 | |
Claudio Atzori | 6b4fa7b8b9 | |
Claudio Atzori | b8bc237079 | |
Claudio Atzori | 8e7ef79ce0 | |
Claudio Atzori | ed6d71fc70 | |
Miriam Baglioni | cbe877b73c | |
Claudio Atzori | 5fc413a5df | |
Claudio Atzori | 97c9706469 | |
Claudio Atzori | 07e7b9315c | |
Alessia | 39810c6e7e | |
Claudio Atzori | e0f58afd30 | |
Claudio Atzori | 60cf7d86a1 | |
Claudio Atzori | fecbf93e0e | |
Claudio Atzori | 64740475d0 | |
Claudio Atzori | 8f551afa52 | |
Miriam Baglioni | 1af6571474 | |
Claudio Atzori | a81c555fe6 | |
Claudio Atzori | 359b8ebda8 | |
Miriam Baglioni | c7f6669f1a | |
Miriam Baglioni | 7cff281d3e | |
Claudio Atzori | d4bf449e8c | |
Miriam Baglioni | fc60661ac5 | |
Claudio Atzori | d771a883f9 | |
Claudio Atzori | 01958a3e07 | |
Claudio Atzori | ceb210993c | |
Miriam Baglioni | 6f1801d7d1 | |
Miriam Baglioni | 19806c2ae3 | |
Miriam Baglioni | 62649dc5c4 | |
Miriam Baglioni | 9573bf576d | |
Michele Artini | d27e9ea50f | |
Michele De Bonis | 4f4c73d65b | |
Miriam Baglioni | 79985ad197 | |
Claudio Atzori | c25b048e12 | |
Claudio Atzori | 06e3985b77 | |
Claudio Atzori | 83327239de | |
Claudio Atzori | db9c54c944 | |
Claudio Atzori | e39e8bbd47 | |
Claudio Atzori | e94ae771ff | |
Claudio Atzori | 6c98d69215 | |
Claudio Atzori | 78b5e4bb6f | |
Claudio Atzori | 40c5d87645 | |
Claudio Atzori | a65241fcaf | |
Claudio Atzori | 6665976604 | |
Claudio Atzori | c99f92efaa | |
Claudio Atzori | f17e1243ba | |
Claudio Atzori | 6a19337dab | |
Miriam Baglioni | 8f11dfe554 | |
Miriam Baglioni | d96215cb9b | |
Miriam Baglioni | 9246bdec1c | |
Miriam Baglioni | 9d27910144 | |
Claudio Atzori | beb93cdfe9 | |
Claudio Atzori | 5aa7847ea6 | |
Claudio Atzori | d20a5e020a | |
Claudio Atzori | 38f8ed27fd | |
Claudio Atzori | 1fb44198fb | |
Claudio Atzori | 3d1d8e6036 | |
Claudio Atzori | 0b1c58358b | |
Claudio Atzori | 6f6e85ddf4 | |
Claudio Atzori | 7fa3d51200 | |
Michele Artini | f99fb21040 | |
Claudio Atzori | b70a440aca | |
Michele Artini | 36c3df1652 | |
Claudio Atzori | e17edb2581 | |
Claudio Atzori | 2f13683285 | |
Claudio Atzori | 61d1fa9b9f | |
Claudio Atzori | 5ab409dcab | |
Claudio Atzori | f9ed2ae33c | |
Claudio Atzori | b756cfeb85 | |
Claudio Atzori | 51d6a541bd | |
Michele Artini | bbe52584f7 | |
Claudio Atzori | 07ce92cef2 | |
Michele Artini | 5cdba9172b | |
Michele De Bonis | 2a36ccb997 | |
Miriam Baglioni | c465835061 | |
Miriam Baglioni | 814e650e12 | |
Miriam Baglioni | f043b7b096 | |
Miriam Baglioni | ddd20e7f8e | |
Claudio Atzori | 153b56eeff | |
Claudio Atzori | 1180d78b71 | |
Claudio Atzori | bb12d0b4df | |
Claudio Atzori | ed97ba4565 | |
Claudio Atzori | 7b398a6d0b | |
Claudio Atzori | 13f6506ce5 | |
Claudio Atzori | 3d9ddaa23a | |
Claudio Atzori | 7d3292551b | |
Claudio Atzori | c06dfdfd86 | |
Claudio Atzori | c7634c55c7 | |
Claudio Atzori | b822b34abe | |
Michele De Bonis | ea1841fbd2 | |
Miriam Baglioni | 4dbce39237 | |
Miriam Baglioni | 3ee8a7d18a | |
Miriam Baglioni | a2b708bb71 | |
Miriam Baglioni | 9cbe966b4a | |
Miriam Baglioni | 236b64d830 | |
Miriam Baglioni | 67ff783e65 | |
Michele De Bonis | a10e8d9f05 | |
Claudio Atzori | 14539f9c8b | |
Claudio Atzori | 1bc8c5d173 | |
Claudio Atzori | ee7deb3f60 | |
Claudio Atzori | 157cc8be87 | |
Claudio Atzori | 1ccf01cdb8 | |
Claudio Atzori | 023099a921 | |
Claudio Atzori | 786c217085 | |
Claudio Atzori | b79cb155ba | |
Lampros Smyrnaios | c858c02111 | |
Claudio Atzori | 33a02c5b9e | |
Claudio Atzori | 1182bca9eb | |
Claudio Atzori | 1c30eacac2 | |
Claudio Atzori | 6055212f77 | |
Claudio Atzori | 0031cf849e | |
Claudio Atzori | 8220e27110 | |
Claudio Atzori | bc993d49c1 | |
Claudio Atzori | 1dc7458de2 | |
Claudio Atzori | a7a54aab47 | |
Serafeim Chatzopoulos | 9f6e16a03c | |
Lampros Smyrnaios | 66cd28f70a | |
Lampros Smyrnaios | c6b1ab2a18 | |
Miriam Baglioni | eaa00a4199 | |
Miriam Baglioni | d35edac212 | |
Claudio Atzori | fb731b6d46 | |
Miriam Baglioni | 6421f8fece | |
Miriam Baglioni | ac270f795b | |
Miriam Baglioni | b6da35e736 | |
Lampros Smyrnaios | 236aed8954 | |
Lampros Smyrnaios | 3c9b8de892 | |
Antonis Lempesis | c67ef157d3 | |
Lampros Smyrnaios | c23f3031ed | |
Claudio Atzori | 8ec151aa3d | |
Claudio Atzori | dd541f8cf5 | |
Lampros Smyrnaios | ff335578ea | |
Lampros Smyrnaios | 285416c74e | |
Lampros Smyrnaios | 3095047e5e | |
Antonis Lempesis | 0456f1b788 | |
Antonis Lempesis | 38636942c7 | |
Claudio Atzori | 2636936162 | |
Lampros Smyrnaios | d942a1101b | |
Miriam Baglioni | ef437a8cdf | |
Giambattista Bloisi | 9bf2bda1c6 | |
Giambattista Bloisi | d90cb099b8 | |
Miriam Baglioni | 86088ef26e | |
Miriam Baglioni | 143c525343 | |
Giambattista Bloisi | 4f2a61e10f | |
Claudio Atzori | 11fe3a4fe0 | |
Claudio Atzori | c371513d43 | |
Claudio Atzori | a8d68c9d29 | |
Claudio Atzori | 71927ca818 | |
Giambattista Bloisi | 46018dc804 | |
Miriam Baglioni | 3efd5b1308 | |
Miriam Baglioni | 8fe934810f | |
Miriam Baglioni | 9da006e98c | |
Miriam Baglioni | 196fa55774 | |
Miriam Baglioni | 50805e3fc1 | |
Giambattista Bloisi | 85c1eae7e0 | |
Claudio Atzori | b0eba210c0 | |
Claudio Atzori | d39a1054b8 | |
Claudio Atzori | 3776327a8c | |
Claudio Atzori | 576efc1857 | |
Claudio Atzori | 0139f23d66 | |
Michele Artini | c726572418 | |
Claudio Atzori | ec79405cc9 | |
Miriam Baglioni | 1477406ecc | |
Claudio Atzori | 92c3abd5a4 | |
Claudio Atzori | ce2364743a | |
Claudio Atzori | f70dc76b61 | |
Claudio Atzori | efc1632e16 | |
Claudio Atzori | 91b49366c6 | |
Claudio Atzori | 5e05385d35 | |
Miriam Baglioni | c4d9b5b9d2 | |
Miriam Baglioni | bf9a5e6314 | |
Miriam Baglioni | 9d79ddb3dd | |
Miriam Baglioni | 907aa28c6c | |
Miriam Baglioni | 3955ceaa76 | |
Miriam Baglioni | 128c143394 | |
Claudio Atzori | 5133993ee5 | |
Claudio Atzori | 73bd1938a5 | |
Claudio Atzori | 5cf259a851 | |
Claudio Atzori | e1828fc60e | |
Claudio Atzori | da5c1e73a4 | |
Claudio Atzori | 81090ad593 | |
Claudio Atzori | 56920b447d | |
Giambattista Bloisi | 3feab5d92d | |
Claudio Atzori | a02f3f0d2b | |
Alessia Bardi | eadfd8d71d | |
Alessia Bardi | 05ee783c07 | |
Alessia Bardi | fe9fb59c90 | |
Claudio Atzori | c272c4ad68 | |
Alessia Bardi | c5f4da16a4 | |
Alessia | 1b165a14a0 | |
Michele Artini | e996787be2 | |
Claudio Atzori | 6be783caec | |
Claudio Atzori | 62716141c5 | |
Claudio Atzori | b703f94f09 | |
Miriam Baglioni | 5d85b70e1f | |
Miriam Baglioni | 14f275ffaf | |
Claudio Atzori | a428e7be7e | |
Lampros Smyrnaios | e3f28338c1 | |
Claudio Atzori | 8e45c5baa8 | |
Claudio Atzori | db5e18c784 | |
Giambattista Bloisi | 73316d8c83 | |
Miriam Baglioni | 75d5ddb999 | |
Miriam Baglioni | 87c9c61b41 | |
Miriam Baglioni | b55fed09f8 | |
Claudio Atzori | 107d958b89 | |
Claudio Atzori | 3a7a6ecc32 | |
Claudio Atzori | 1af4224d3d | |
Claudio Atzori | 0d5bdb2db0 | |
Claudio Atzori | 66548e6a83 | |
Claudio Atzori | fb266efbcb | |
Claudio Atzori | d7daf54333 | |
Claudio Atzori | f99eaa0376 | |
Claudio Atzori | 23312fcc1e | |
Miriam Baglioni | b864f0adcf | |
Miriam Baglioni | 7a44869d87 | |
Miriam Baglioni | 12ffde023f | |
Antonis Lempesis | 15b54a345a | |
Lampros Smyrnaios | b48ed6e617 | |
Lampros Smyrnaios | 68322843e2 | |
Lampros Smyrnaios | c7b32bbacc | |
Giambattista Bloisi | 1b2357e10a | |
Sandro La Bruzzo | f1fe363b19 | |
Sandro La Bruzzo | 66c1ffc866 | |
Claudio Atzori | c3fe59bc78 | |
Claudio Atzori | 1ea67eba82 | |
Claudio Atzori | f9fb2fef6e | |
Claudio Atzori | 834461ba26 | |
Sandro La Bruzzo | e8a61d5dd5 | |
Sandro La Bruzzo | ca9414b737 | |
Sandro La Bruzzo | 032bcc8279 | |
Sandro La Bruzzo | 103e2652b3 | |
Sandro La Bruzzo | a87f9ea643 | |
Sandro La Bruzzo | 6efab4d88e | |
Claudio Atzori | 92f018d196 | |
Claudio Atzori | 0611c81a2f | |
Michele Artini | 2b3b5fe9a1 | |
Claudio Atzori | 1efe7f7e39 | |
Claudio Atzori | 53e7bb4336 | |
Claudio Atzori | f7d56e2ef2 | |
Claudio Atzori | c1237ab39e | |
Claudio Atzori | dc3a5858f7 | |
Claudio Atzori | 55f39f7850 | |
Claudio Atzori | 39a2afe8b5 | |
Claudio Atzori | 908ed9da7a | |
Antonis Lempesis | 0cada3cc8f | |
Antonis Lempesis | 90a4fb3547 | |
Claudio Atzori | 18aa323ee9 | |
Michele Artini | c9a327bc50 | |
Michele Artini | e234848af8 | |
Claudio Atzori | b4e3389432 | |
Giambattista Bloisi | 711048ceed | |
Michele Artini | 70bf6ac415 | |
Michele Artini | aa40e53c19 | |
Michele Artini | ed052a3476 | |
Claudio Atzori | 26363060ed | |
Claudio Atzori | 0486227185 | |
Claudio Atzori | a5d13d5d27 | |
Claudio Atzori | e1a0fb8933 | |
Giambattista Bloisi | 69c5efbd8b | |
Sandro La Bruzzo | db358ad0d2 | |
Sandro La Bruzzo | 26bf8e763a | |
Sandro La Bruzzo | a860c57bbc | |
Sandro La Bruzzo | 0646d0d064 | |
Claudio Atzori | 00ad21d814 | |
Claudio Atzori | 4355f64810 | |
Claudio Atzori | 66680b8b9a | |
Claudio Atzori | dcf23b3d06 | |
Michele Artini | f4068de298 | |
Claudio Atzori | 11bd89e132 | |
Claudio Atzori | e96c2c1606 | |
Claudio Atzori | 50c18f7a0b | |
Michele Artini | 2615136efc | |
Sandro La Bruzzo | 133ead1e3e | |
Sandro La Bruzzo | 052c6aac9d | |
Sandro La Bruzzo | 9cd3bc0f10 | |
Claudio Atzori | c08a58bba8 | |
Claudio Atzori | e2937db385 | |
Giambattista Bloisi | 1878199dae | |
Sandro La Bruzzo | 0d628cd62b | |
Lampros Smyrnaios | 49af2e5740 | |
Antonis Lempesis | d2649a1429 | |
Claudio Atzori | c3053ef34d | |
Claudio Atzori | b5bcab13ec | |
Claudio Atzori | 425c9afc36 | |
Claudio Atzori | 93dd9cc639 | |
Miriam Baglioni | 6189879643 | |
Claudio Atzori | c57cff2d6d | |
Miriam Baglioni | 7de114bda0 | |
Claudio Atzori | eb4692e4ee | |
Claudio Atzori | 24a83fc24f | |
Sandro La Bruzzo | 073f320c6a | |
Miriam Baglioni | 776c898c4b | |
Claudio Atzori | 5857fd38c1 | |
Claudio Atzori | 0656ab2838 | |
Claudio Atzori | ab7f0855af | |
Claudio Atzori | 7a7e313157 | |
Claudio Atzori | e5879b68c7 | |
Claudio Atzori | 3a027e97a7 | |
Claudio Atzori | 795e1b2629 | |
Claudio Atzori | 0c05abe50b | |
Sandro La Bruzzo | b72c3139e2 | |
Sandro La Bruzzo | b84ad0c06e | |
Antonis Lempesis | b52a5a753b | |
Sandro La Bruzzo | 8dd9cf84e2 | |
Sandro La Bruzzo | 342cb6189b | |
Antonis Lempesis | c3fe9662b2 | |
Claudio Atzori | 57c678d904 | |
Claudio Atzori | 5ab8cd1794 | |
Claudio Atzori | 8fdd0244ad | |
Claudio Atzori | 18fdaaf548 | |
Antonis Lempesis | 0c71c58df6 | |
Antonis Lempesis | 43d05dbebb | |
Antonis Lempesis | e728a0897c | |
Antonis Lempesis | 308ae580a9 | |
Antonis Lempesis | 27d22bd8f9 | |
Antonis Lempesis | 1f5aba12fa | |
Claudio Atzori | 43e123c624 | |
Claudio Atzori | 62a07b7add | |
Claudio Atzori | 96bddcc921 | |
Claudio Atzori | b554c41cc7 | |
Claudio Atzori | ac8747582c | |
Claudio Atzori | 0db7e4ae9a | |
Giambattista Bloisi | 8ac167e420 | |
Miriam Baglioni | 0486cea4c4 | |
Miriam Baglioni | 0625b9061f | |
Miriam Baglioni | 9eeb9f5d32 | |
Claudio Atzori | 589bce3520 | |
Claudio Atzori | 013935c593 | |
Sandro La Bruzzo | a5ddd8dfbb | |
Giambattista Bloisi | da333e9f4d | |
Claudio Atzori | 43fd1de681 | |
Claudio Atzori | d070db4a32 | |
Michele Artini | 78b9d84e4a | |
Giambattista Bloisi | 43b454399f | |
Lampros Smyrnaios | d7da4f814b | |
Lampros Smyrnaios | 14719dcd62 | |
Sandro La Bruzzo | 41a42dde64 | |
Sandro La Bruzzo | 843dc95340 | |
Sandro La Bruzzo | 1e30454ee0 | |
Sandro La Bruzzo | 2581672c11 | |
Lampros Smyrnaios | 22745027c8 | |
Lampros Smyrnaios | abf0b69f29 | |
Claudio Atzori | 3cad4a415d | |
Sandro La Bruzzo | a0642bd190 | |
Claudio Atzori | 6132bd028e | |
Miriam Baglioni | 519db1ddef | |
Sandro La Bruzzo | 98dc042db5 | |
Sandro La Bruzzo | ef582948a7 | |
Sandro La Bruzzo | 5142f462b5 | |
Miriam Baglioni | 0794e0667b | |
Miriam Baglioni | 4b1de076ac | |
Miriam Baglioni | c8a88b2187 | |
Sandro La Bruzzo | 31e152d2bb | |
Sandro La Bruzzo | 6f3e925cae | |
Miriam Baglioni | f0f6abf892 | |
Claudio Atzori | 26b97aa5ed | |
Claudio Atzori | 5add51f38c | |
Lampros Smyrnaios | b7c8acc563 | |
Miriam Baglioni | 50fbebf186 | |
Michele Artini | 71d6e02886 | |
Michele Artini | 02c9a311c8 | |
Miriam Baglioni | 42846d3b91 | |
Miriam Baglioni | 4f0a044245 | |
Miriam Baglioni | 4bb504e693 | |
Serafeim Chatzopoulos | cbe13a5c61 | |
Miriam Baglioni | 9c9a9562ae | |
Miriam Baglioni | 2c4440951f | |
Miriam Baglioni | b42bdd5fb3 | |
Miriam Baglioni | 64cbd8abe9 | |
Antonis Lempesis | df6e3bda04 | |
Antonis Lempesis | 573b081f1d | |
Serafeim Chatzopoulos | 0eb0701b26 | |
Antonis Lempesis | 0bf2a7a359 | |
Claudio Atzori | 24227ab598 | |
Claudio Atzori | f01390702e | |
Antonis Lempesis | 9ff44eed96 | |
Claudio Atzori | cff6040424 | |
Claudio Atzori | 5592ccc37a | |
Antonis Lempesis | 1fee4124e0 | |
Sandro La Bruzzo | 73a67c0e4a | |
Claudio Atzori | 9e700a8b0d | |
Claudio Atzori | 75551ad4ec | |
Miriam Baglioni | 94b931f7bd | |
Miriam Baglioni | 3b209261f2 | |
Claudio Atzori | d16c15da8d | |
Lampros Smyrnaios | 036ba03fcd | |
Claudio Atzori | 09a6d17059 | |
Claudio Atzori | d70793847d | |
Claudio Atzori | 730eaffc85 | |
Lampros Smyrnaios | bc8c97182d | |
Lampros Smyrnaios | 92cc27e7eb | |
Claudio Atzori | ef52128c55 | |
Claudio Atzori | bfba71a95c | |
Claudio Atzori | d72e7b7487 | |
Sandro La Bruzzo | ece56f0178 | |
Claudio Atzori | 414acd4ef4 | |
Claudio Atzori | ecff0b4825 | |
Claudio Atzori | 25c2025223 | |
Claudio Atzori | 538b180fe0 | |
Claudio Atzori | eae88c0fe3 | |
Claudio Atzori | 82fc609c4f | |
Claudio Atzori | 4b978ffa2d | |
Claudio Atzori | fa4b3e6d2b | |
Claudio Atzori | 74e5d05577 | |
Claudio Atzori | 6c3b692f60 | |
Claudio Atzori | e9eb590f87 | |
Claudio Atzori | 9a5b134ddf | |
Claudio Atzori | 069803f34a | |
Claudio Atzori | 71c1f81b54 | |
Claudio Atzori | c3c9bdb59c | |
Claudio Atzori | 91b61687fa | |
Claudio Atzori | 63067d4b24 | |
Claudio Atzori | e0c315b07b | |
Claudio Atzori | 54936b7f42 | |
Claudio Atzori | 9fc70a9451 | |
Michele Artini | e1149eb5c4 | |
Michele De Bonis | f6601ea7d1 | |
Michele Artini | 3f174ad90f | |
Michele Artini | 6ffb1faf09 | |
Giambattista Bloisi | 3f22c101d9 | |
Claudio Atzori | c8683eb13c | |
Claudio Atzori | aaa73f89d1 | |
Giambattista Bloisi | 0ff7faad72 | |
Michele De Bonis | cd4c3c934d | |
Michele Artini | 7faa115ba0 | |
Michele Artini | f9c74c98fa | |
Claudio Atzori | 7ae7e8aa06 | |
Antonis Lempesis | 4c40c96e30 | |
Antonis Lempesis | 459167ac2f | |
Antonis Lempesis | 07f634a46d | |
Antonis Lempesis | 9521625a07 | |
Sandro La Bruzzo | 58dbe71d39 | |
Antonis Lempesis | 67a5aa0a38 | |
dimitrispie | a3a570e9a0 | |
Giambattista Bloisi | 664a381d31 | |
Michele Artini | cb29b9773c | |
Michele Artini | 85b844d57e | |
Michele Artini | 455f2e1e07 | |
Michele Artini | 30167aa882 | |
Michele Artini | 88fef367b9 | |
Claudio Atzori | 078169b922 | |
Claudio Atzori | af154d4456 | |
Claudio Atzori | 7863c92466 | |
Claudio Atzori | eb5887cb9a | |
Michele Artini | a99942f7cf | |
Michele Artini | 7f7083f53e | |
Sandro La Bruzzo | 5281f010a5 | |
Sandro La Bruzzo | ee1fcb672b | |
Miriam Baglioni | 5a32bb9578 | |
Sandro La Bruzzo | c532831718 | |
Miriam Baglioni | 48c052215c | |
Michele Artini | d9b23a76c5 | |
Michele Artini | 841ca92246 | |
Michele Artini | 3bcfc40293 | |
Claudio Atzori | db66555ebb | |
Antonis Lempesis | f74c7e8689 | |
Giambattista Bloisi | 9092075760 | |
Sandro La Bruzzo | cbd4e5e4bb | |
Claudio Atzori | d4871b31e8 | |
Antonis Lempesis | 3c79720342 | |
Antonis Lempesis | 5ae4b4286c | |
Miriam Baglioni | 5180b6ec8a | |
Miriam Baglioni | 7827a2d66b | |
Antonis Lempesis | 316d585c8a | |
Miriam Baglioni | fd34372c40 | |
Sandro La Bruzzo | d34cef3f8d | |
Sandro La Bruzzo | 3b837d38ce | |
Sandro La Bruzzo | f417515e43 | |
Giambattista Bloisi | 3067ea390d | |
Sandro La Bruzzo | ad0e9aa80c | |
Sandro La Bruzzo | 9d94648f3b | |
Giambattista Bloisi | 3cd5590f3b | |
Giambattista Bloisi | 56dd05f85c | |
Claudio Atzori | 6fcf872daa | |
Claudio Atzori | 3f07390a58 | |
Miriam Baglioni | c94d94035c | |
Sandro La Bruzzo | 7d806a434c | |
Sandro La Bruzzo | e468e99100 | |
Sandro La Bruzzo | b63994dcc4 | |
Sandro La Bruzzo | 915a76a796 | |
Giambattista Bloisi | 773e856550 | |
Sandro La Bruzzo | a712df1e1d | |
Sandro La Bruzzo | b32a9d1994 | |
Michele Artini | 3268570b2c | |
Michele Artini | 4374d7449e | |
Miriam Baglioni | 72bae7af76 | |
Miriam Baglioni | 43da7e1191 | |
Serafeim Chatzopoulos | f0dc12634b | |
Claudio Atzori | 07d009007b | |
Claudio Atzori | 071d044971 | |
Claudio Atzori | b3ddbaed58 | |
Claudio Atzori | 753c2a72bd | |
Claudio Atzori | a63b091bae | |
Giambattista Bloisi | 85aeff72f1 | |
Giambattista Bloisi | d65285da7f | |
Giambattista Bloisi | 29194472a7 | |
Miriam Baglioni | 8dae10b442 | |
Miriam Baglioni | 83bb97be83 | |
Miriam Baglioni | 6e1f383e4a | |
Miriam Baglioni | 3f7d262a4e | |
Miriam Baglioni | eca021f4d6 | |
Miriam Baglioni | bdb6bbb365 | |
Claudio Atzori | d85d2df6ad | |
Claudio Atzori | 1416f16b35 | |
Giambattista Bloisi | b19643f6eb | |
Giambattista Bloisi | ba1a0e7b4f | |
Giambattista Bloisi | 079085286c | |
Giambattista Bloisi | 8dd666aedd | |
Claudio Atzori | f21133229a | |
Claudio Atzori | d86b909db2 | |
Claudio Atzori | 08162902ab | |
Claudio Atzori | e6bdee86d1 | |
Antonis Lempesis | dd4c27f4f3 | |
Claudio Atzori | 38c9001147 | |
Claudio Atzori | fd17c1f17c | |
Claudio Atzori | 009dcf6aea | |
Claudio Atzori | bb82052c40 | |
Claudio Atzori | e8630a6d03 | |
Claudio Atzori | 42f5506306 | |
Claudio Atzori | f28c63d5ef | |
Alessia Bardi | f2a08d8cc2 | |
Antonis Lempesis | a512ead447 | |
Miriam Baglioni | 07a373a0bd | |
Miriam Baglioni | ead08b0dd4 | |
Claudio Atzori | 1a8b609ed2 | |
Antonis Lempesis | bb10a22290 | |
Miriam Baglioni | 4c8706efee | |
Miriam Baglioni | a5995ab557 | |
Miriam Baglioni | a418dacb47 | |
Miriam Baglioni | e9131f4e4a | |
Sandro La Bruzzo | 9aebca77a0 | |
Claudio Atzori | f804c58bc7 | |
Claudio Atzori | 926903b06b | |
Giambattista Bloisi | 078df0b4d1 | |
Claudio Atzori | 4d0c59669b | |
Claudio Atzori | bf99c424fa | |
Claudio Atzori | ce3200263e | |
Sandro La Bruzzo | 3c8c88bdd3 | |
Sandro La Bruzzo | e889808daa | |
Claudio Atzori | 9e8fc6aa88 | |
Antonis Lempesis | c548796463 | |
Sandro La Bruzzo | 0386f36385 | |
Antonis Lempesis | a7115cfa9e | |
Antonis Lempesis | fd43b0e84a | |
Claudio Atzori | 2838a9b630 | |
Claudio Atzori | da944a5c55 | |
Claudio Atzori | 0c97a3a81a | |
Claudio Atzori | 2c1e6849f0 | |
Claudio Atzori | 9b13c22e5d | |
Claudio Atzori | 3e96777cc4 | |
Sandro La Bruzzo | 43e0bba7ed | |
Miriam Baglioni | f7d06dc661 | |
Miriam Baglioni | 6e58d79623 | |
Miriam Baglioni | e0ec800d7e | |
Claudio Atzori | 9812406589 | |
Claudio Atzori | f87f3a6483 | |
Claudio Atzori | 6fd25cf549 | |
Claudio Atzori | bd187ec6e7 | |
Claudio Atzori | f76852f385 | |
Claudio Atzori | b9fcc5ad5e | |
Claudio Atzori | 1c6db320f4 | |
Claudio Atzori | 2655eea5bc | |
Claudio Atzori | c6b3401596 | |
Miriam Baglioni | bcc0a13981 | |
Miriam Baglioni | 6af536541d | |
Miriam Baglioni | a12a3eb143 | |
Claudio Atzori | 628fdfb5eb | |
Miriam Baglioni | 82e9e262ee | |
Miriam Baglioni | 67ce2d54be | |
Miriam Baglioni | 59eaccbd87 | |
Giambattista Bloisi | 21a14fcd80 | |
Sandro La Bruzzo | e0753f19da | |
sandro.labruzzo | e328bc0ade | |
Claudio Atzori | 2d302e6827 | |
Miriam Baglioni | f612125939 | |
Claudio Atzori | c67467723b | |
Claudio Atzori | cb9e739484 | |
Claudio Atzori | 2753044d13 | |
Giambattista Bloisi | a88dce5bf3 | |
Giambattista Bloisi | 3c66e3bd7b | |
Giambattista Bloisi | 10e135db1e | |
Giambattista Bloisi | 831cc1fdde | |
Giambattista Bloisi | 1287315ffb | |
Giambattista Bloisi | 02636e802c | |
Antonis Lempesis | e024718f73 | |
Sandro La Bruzzo | 859babf722 | |
Sandro La Bruzzo | 39ebb60b38 | |
Sandro La Bruzzo | 9d5a7c3b22 | |
Sandro La Bruzzo | 8f61063201 | |
Sandro La Bruzzo | 1a42a5c10d | |
Claudio Atzori | 16d858fbf0 | |
Miriam Baglioni | e711a05229 | |
Miriam Baglioni | 71d6f30711 | |
dimitrispie | b920307bdd | |
dimitrispie | 8b2cbb611e | |
Antonis Lempesis | 2e4cab026c | |
dimitrispie | 6b823100ae | |
dimitrispie | 75bfde043c | |
Miriam Baglioni | cb14470ba6 | |
Miriam Baglioni | 9f966b59d4 | |
Miriam Baglioni | 2f3b5a133d | |
Miriam Baglioni | 2f7b9ad815 | |
Miriam Baglioni | f2352e8a78 | |
Miriam Baglioni | 009730b3d1 | |
Miriam Baglioni | 89f269c7f4 | |
Miriam Baglioni | b06aea0adf | |
Miriam Baglioni | 3afd4aa57b | |
dimitrispie | ffdd03d2f4 | |
dimitrispie | 40b98d8182 | |
Claudio Atzori | 62104790ae | |
Claudio Atzori | 106968adaa | |
Claudio Atzori | a8a4db96f0 | |
Miriam Baglioni | 5011c4d11a | |
Miriam Baglioni | 4740c808f7 | |
Miriam Baglioni | d410ea8a41 | |
Sandro La Bruzzo | 37e36baf76 | |
Miriam Baglioni | 624f5f3f21 | |
Miriam Baglioni | 354e02e6a9 | |
Miriam Baglioni | b00771c7cc | |
Sandro La Bruzzo | 9d39845d1f | |
Sandro La Bruzzo | 15fd93a2b6 | |
Sandro La Bruzzo | 9d342a47da | |
Sandro La Bruzzo | 1fbd4325f5 | |
Sandro La Bruzzo | 1f1a6a5f5f | |
Miriam Baglioni | 3eca5d2e1c | |
Miriam Baglioni | 01ce0b9c76 | |
Miriam Baglioni | 0d8e496a63 | |
Claudio Atzori | c4ec35b6cd | |
Claudio Atzori | 1726f49790 | |
Claudio Atzori | a59be5779e | |
Claudio Atzori | ff924215b8 | |
Claudio Atzori | a6d635e695 | |
Claudio Atzori | 98cce5bfb2 | |
Claudio Atzori | 84d54643cf | |
Claudio Atzori | 7e8eff40c1 | |
Miriam Baglioni | 8752d275fa | |
Miriam Baglioni | d4eedada71 | |
Claudio Atzori | aba95ed1d1 | |
Claudio Atzori | 2877839df0 | |
Claudio Atzori | 34abd0fc43 | |
Claudio Atzori | cb71a7936b | |
Claudio Atzori | 70eb1796b2 | |
Claudio Atzori | c381bacee0 | |
Miriam Baglioni | 336fb31d87 | |
Miriam Baglioni | c0cde53bf6 | |
Miriam Baglioni | 616622d2bb | |
Claudio Atzori | 259c69e446 | |
Claudio Atzori | 431c6bb08a | |
Giambattista Bloisi | 613ec5ffce | |
Sandro La Bruzzo | 52495f2cd2 | |
Sandro La Bruzzo | 8c3e9a09d3 | |
Giambattista Bloisi | 2fa78f6071 | |
Giambattista Bloisi | 326c9dc08c | |
Claudio Atzori | 982c0c110b | |
Claudio Atzori | 321922772b | |
Claudio Atzori | c5b7253130 | |
Claudio Atzori | 3c3bdb8318 | |
Claudio Atzori | 7c3041b276 | |
Claudio Atzori | 74b185d07b | |
Claudio Atzori | e6086efc53 | |
Claudio Atzori | 2a233a89aa | |
Claudio Atzori | 178a14c491 | |
Sandro La Bruzzo | 3caf6ff27e | |
Claudio Atzori | 511a98dd80 | |
Claudio Atzori | d33f578e54 | |
Claudio Atzori | c5ac593c07 | |
Claudio Atzori | 09d061e90b | |
Claudio Atzori | 93a700742a | |
Claudio Atzori | 0c3c9ea43d | |
Claudio Atzori | 33cb483c75 | |
dimitrispie | c9d995dde0 | |
dimitrispie | a397112cb8 | |
dimitrispie | 76594ded23 | |
Claudio Atzori | 622fafbd2e | |
Sandro La Bruzzo | bf0fd27c36 | |
dimitrispie | 48430a32a6 | |
Sandro La Bruzzo | cdfb7588dd | |
Sandro La Bruzzo | 5e22b67b8a | |
Sandro La Bruzzo | f718caaac9 | |
Sandro La Bruzzo | 7b5e04f37e | |
Claudio Atzori | 4cbabc9fbc | |
Claudio Atzori | 6f10791e77 | |
Claudio Atzori | 4e1aac2e2f | |
Sandro La Bruzzo | 86b5775e08 | |
Sandro La Bruzzo | c96ff54b45 | |
Sandro La Bruzzo | af1c2634b3 | |
Sandro La Bruzzo | 279100fa52 | |
Sandro La Bruzzo | aa239ec673 | |
Sandro La Bruzzo | 59111713fa | |
Sandro La Bruzzo | 6f4d0c05ea | |
Miriam Baglioni | 8eb70e6657 | |
Miriam Baglioni | e3cce9a5a0 | |
Miriam Baglioni | 48e0427a23 | |
Sandro La Bruzzo | 34a4b3cbdf | |
Claudio Atzori | 1763d377ad | |
Claudio Atzori | 1ba582de3c | |
dimitrispie | 359e81b7a6 | |
Claudio Atzori | a0311e8a90 | |
Claudio Atzori | 8fb05888fd | |
Claudio Atzori | a21617732a | |
Claudio Atzori | 2c77638bf5 | |
Claudio Atzori | 836d7ec724 | |
Claudio Atzori | 745039ad5b | |
Claudio Atzori | 008fdf9d8a | |
Claudio Atzori | 11a1207f9c | |
dimitrispie | a94a54a2d0 | |
Claudio Atzori | 2b626815ff | |
Miriam Baglioni | b177cd5a0a | |
Miriam Baglioni | eaf0a702de | |
Sandro La Bruzzo | 6ce36b3e41 | |
dimitrispie | d524e30866 | |
Serafeim Chatzopoulos | 671ba8a5a7 | |
Miriam Baglioni | 5bc97615d5 | |
Miriam Baglioni | 7b1e34f159 | |
Miriam Baglioni | 638ad9e74f | |
Miriam Baglioni | edcb17ca98 | |
Claudio Atzori | 5f1ed61c1f | |
Claudio Atzori | 8c03c41d5d | |
Claudio Atzori | 97454e9594 | |
Serafeim Chatzopoulos | 7e34dde774 | |
Serafeim Chatzopoulos | 24c3f92d87 | |
Serafeim Chatzopoulos | 6ce9b600c1 | |
Serafeim Chatzopoulos | 94089878fd | |
Miriam Baglioni | 937ff6a7c7 | |
Miriam Baglioni | a737dd47b6 | |
Miriam Baglioni | c80b768af0 | |
Miriam Baglioni | e9a20fc8f6 | |
Claudio Atzori | dde2fec035 | |
Claudio Atzori | 262d7c581b | |
Serafeim Chatzopoulos | 2090003ea9 | |
Miriam Baglioni | 0097f4e64b | |
Serafeim Chatzopoulos | a82aaf57b2 | |
Claudio Atzori | b3a61ea955 | |
dimitrispie | 89c4dfbaf4 | |
Miriam Baglioni | 5c5a195e97 | |
Claudio Atzori | a870aa2b09 | |
Claudio Atzori | 7fc621cdec | |
Miriam Baglioni | 70b78a40c7 | |
Miriam Baglioni | f206ff42d6 | |
Miriam Baglioni | 34358afe75 | |
Miriam Baglioni | 18bfff8af3 | |
Miriam Baglioni | 69dac91659 | |
Serafeim Chatzopoulos | aad5982bf1 | |
Miriam Baglioni | a9ede1e989 | |
Miriam Baglioni | a4214ced1e | |
Serafeim Chatzopoulos | 6b19dcee80 | |
Claudio Atzori | 2b9d0416ec | |
Claudio Atzori | b0fed1725e | |
Miriam Baglioni | f1b898c6b4 | |
Claudio Atzori | a24178cb93 | |
Claudio Atzori | d28b7085f6 | |
Claudio Atzori | 3b1c8b9fbd | |
Claudio Atzori | 1d594eaffd | |
Giambattista Bloisi | 0e44b037a5 | |
Claudio Atzori | 6dfcd0c9a2 | |
Claudio Atzori | 39d24d5469 | |
Claudio Atzori | 389e3fcc59 | |
Sandro La Bruzzo | a5a89a702f | |
Miriam Baglioni | 159388f9c2 | |
Claudio Atzori | 03670bb9ce | |
Claudio Atzori | 54fbf09ac6 | |
Claudio Atzori | 6cf64d5d8b | |
Claudio Atzori | 76447958bb | |
Claudio Atzori | 1902728f7e | |
Claudio Atzori | dda602fff7 | |
Claudio Atzori | 05ee7d8b09 | |
Miriam Baglioni | 8e9493fad9 | |
Miriam Baglioni | 89184d5b4f | |
Claudio Atzori | 554551682d | |
Claudio Atzori | a460ebe215 | |
Claudio Atzori | ecea58a41c | |
Claudio Atzori | 66064e99fe | |
Miriam Baglioni | a431b04814 | |
Claudio Atzori | ed9282ef2a | |
Miriam Baglioni | 110ce4b40f | |
Claudio Atzori | 204404b0e3 | |
Claudio Atzori | 9a98f408b3 | |
Claudio Atzori | 4e6fccf4f6 | |
Miriam Baglioni | a3d01ccb24 | |
Miriam Baglioni | 8448b9ebfb | |
Miriam Baglioni | 3d6be20989 | |
dimitrispie | 17586f0ff8 | |
dimitrispie | 489a082f04 | |
Claudio Atzori | ef833840c3 | |
Claudio Atzori | 84a58802ab | |
Claudio Atzori | 46034630cf | |
Claudio Atzori | 774e874d18 | |
Claudio Atzori | 3bc44fbf1d | |
Claudio Atzori | 11153742c9 | |
Claudio Atzori | 8108491722 | |
Giambattista Bloisi | 2f3cf6d0e7 | |
Claudio Atzori | 6856ab28ab | |
Claudio Atzori | 3c23d5f9bc | |
Claudio Atzori | 858931ccb6 | |
Claudio Atzori | f759b18bca | |
Claudio Atzori | eed9fe0902 | |
Claudio Atzori | 7f27111b1f | |
Claudio Atzori | 73c49b8d26 | |
Sandro La Bruzzo | 42a2dad975 | |
Sandro La Bruzzo | 13f332ce77 | |
Serafeim Chatzopoulos | 1bb83b9188 | |
Claudio Atzori | ee8a39e7d2 | |
Serafeim Chatzopoulos | e9f24df21c | |
Serafeim Chatzopoulos | cae75fc75d | |
Serafeim Chatzopoulos | b49a3ac9b2 | |
Serafeim Chatzopoulos | 24c43e0c60 | |
Serafeim Chatzopoulos | 9f73d93e62 | |
Claudio Atzori | b446a9ed98 | |
Claudio Atzori | f344ad76d0 | |
Claudio Atzori | 5919e488dd | |
Serafeim Chatzopoulos | 839a8524e7 | |
Claudio Atzori | c9a5ad6a02 | |
Miriam Baglioni | d7fccdc64b | |
Miriam Baglioni | 9898470b0e | |
Giambattista Bloisi | c412dc162b | |
Claudio Atzori | 5d09b7db8b | |
Claudio Atzori | 7b403a920f | |
Claudio Atzori | dc86018a5f | |
Giambattista Bloisi | 3c47920c78 | |
Claudio Atzori | 7f244d9a7a | |
Giambattista Bloisi | e239b81740 | |
Miriam Baglioni | e84f5b5e64 | |
Serafeim Chatzopoulos | ab0d70691c | |
Serafeim Chatzopoulos | ed9c81a0b7 | |
Alessia Bardi | 0935d7757c | |
Alessia Bardi | cc7204a089 | |
Sandro La Bruzzo | 76476cdfb6 | |
dimitrispie | 9ef971a146 | |
Serafeim Chatzopoulos | 9d44418d38 | |
Serafeim Chatzopoulos | 395a4af020 | |
Claudio Atzori | 8a6892cc63 | |
Claudio Atzori | 4786aa0e09 | |
dimitrispie | 5f90cc11e9 | |
Giambattista Bloisi | 2caaaec42d | |
dimitrispie | 964c2f553e | |
Giambattista Bloisi | 6cc7d8ca7b | |
Miriam Baglioni | 599828ce35 | |
Miriam Baglioni | 9e8e39f78a | |
dimitrispie | be4856ef35 | |
dimitrispie | 163b2ee2a8 | |
Miriam Baglioni | 4c9bc4c3a5 | |
Miriam Baglioni | 8621377917 | |
Miriam Baglioni | ef2dd7a980 | |
Miriam Baglioni | 55ea485783 | |
Miriam Baglioni | e4b27182d0 | |
Miriam Baglioni | d9506035e4 | |
Miriam Baglioni | b64a5eb4a5 | |
Miriam Baglioni | 9fc8ebe98b |
|
@ -27,3 +27,4 @@ spark-warehouse
|
||||||
/**/.factorypath
|
/**/.factorypath
|
||||||
/**/.scalafmt.conf
|
/**/.scalafmt.conf
|
||||||
/.java-version
|
/.java-version
|
||||||
|
/dhp-shade-package/dependency-reduced-pom.xml
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
# Contributor Code of Conduct
|
||||||
|
|
||||||
|
Openness, transparency and our community-driven participatory approach guide us in our day-to-day interactions and decision-making. Our open source projects are no exception. Trust, respect, collaboration and transparency are core values we believe should live and breathe within our projects. Our community welcomes participants from around the world with different experiences, unique perspectives, and great ideas to share.
|
||||||
|
|
||||||
|
## Our Pledge
|
||||||
|
|
||||||
|
In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
|
||||||
|
|
||||||
|
## Our Standards
|
||||||
|
|
||||||
|
Examples of behavior that contributes to creating a positive environment include:
|
||||||
|
|
||||||
|
- Using welcoming and inclusive language
|
||||||
|
- Being respectful of differing viewpoints and experiences
|
||||||
|
- Gracefully accepting constructive criticism
|
||||||
|
- Attempting collaboration before conflict
|
||||||
|
- Focusing on what is best for the community
|
||||||
|
- Showing empathy towards other community members
|
||||||
|
|
||||||
|
Examples of unacceptable behavior by participants include:
|
||||||
|
|
||||||
|
- Violence, threats of violence, or inciting others to commit self-harm
|
||||||
|
- The use of sexualized language or imagery and unwelcome sexual attention or advances
|
||||||
|
- Trolling, intentionally spreading misinformation, insulting/derogatory comments, and personal or political attacks
|
||||||
|
- Public or private harassment
|
||||||
|
- Publishing others' private information, such as a physical or electronic address, without explicit permission
|
||||||
|
- Abuse of the reporting process to intentionally harass or exclude others
|
||||||
|
- Advocating for, or encouraging, any of the above behavior
|
||||||
|
- Other conduct which could reasonably be considered inappropriate in a professional setting
|
||||||
|
|
||||||
|
## Our Responsibilities
|
||||||
|
|
||||||
|
Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
|
||||||
|
|
||||||
|
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
|
||||||
|
|
||||||
|
## Attribution
|
||||||
|
|
||||||
|
This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), [version 1.4](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html).
|
|
@ -0,0 +1,10 @@
|
||||||
|
# Contributing to D-Net Hadoop
|
||||||
|
|
||||||
|
:+1::tada: First off, thanks for taking the time to contribute! :tada::+1:
|
||||||
|
|
||||||
|
This project and everyone participating in it is governed by our [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it).
|
||||||
|
|
||||||
|
The following is a set of guidelines for contributing to this project and its packages. These are mostly guidelines, not rules, which applies to this project as a while, including all its sub-modules.
|
||||||
|
Use your best judgment, and feel free to propose changes to this document in a pull request.
|
||||||
|
|
||||||
|
All contributions are welcome, all contributions will be considered to be contributed under the [project license](LICENSE.md).
|
|
@ -2,6 +2,11 @@
|
||||||
|
|
||||||
Dnet-hadoop is the project that defined all the [OOZIE workflows](https://oozie.apache.org/) for the OpenAIRE Graph construction, processing, provisioning.
|
Dnet-hadoop is the project that defined all the [OOZIE workflows](https://oozie.apache.org/) for the OpenAIRE Graph construction, processing, provisioning.
|
||||||
|
|
||||||
|
This project adheres to the Contributor Covenant [code of conduct](CODE_OF_CONDUCT.md).
|
||||||
|
By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it).
|
||||||
|
|
||||||
|
This project is licensed under the [AGPL v3 or later version](#LICENSE.md).
|
||||||
|
|
||||||
How to build, package and run oozie workflows
|
How to build, package and run oozie workflows
|
||||||
====================
|
====================
|
||||||
|
|
||||||
|
|
|
@ -80,7 +80,15 @@ class WritePredefinedProjectPropertiesTest {
|
||||||
mojo.outputFile = testFolder;
|
mojo.outputFile = testFolder;
|
||||||
|
|
||||||
// execute
|
// execute
|
||||||
Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute());
|
try {
|
||||||
|
mojo.execute();
|
||||||
|
Assertions.assertTrue(false); // not reached
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
MojoExecutionException.class.isAssignableFrom(e.getClass()) ||
|
||||||
|
IllegalArgumentException.class.isAssignableFrom(e.getClass()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -63,15 +63,14 @@
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>edu.cmu</groupId>
|
||||||
<artifactId>dhp-pace-core</artifactId>
|
<artifactId>secondstring</artifactId>
|
||||||
<version>${project.version}</version>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.ibm.icu</groupId>
|
||||||
|
<artifactId>icu4j</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-common</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.sisyphsu</groupId>
|
<groupId>com.github.sisyphsu</groupId>
|
||||||
<artifactId>dateparser</artifactId>
|
<artifactId>dateparser</artifactId>
|
||||||
|
@ -161,7 +160,7 @@
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>${dhp-schemas.artifact}</artifactId>
|
<artifactId>dhp-schemas</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -170,4 +169,23 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
<!-- dependencies required on JDK9+ because J2EE has been removed -->
|
||||||
|
<profiles>
|
||||||
|
<profile>
|
||||||
|
<id>spark-34</id>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>javax.xml.bind</groupId>
|
||||||
|
<artifactId>jaxb-api</artifactId>
|
||||||
|
<version>2.2.11</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.sun.xml.ws</groupId>
|
||||||
|
<artifactId>jaxws-ri</artifactId>
|
||||||
|
<version>2.3.3</version>
|
||||||
|
<type>pom</type>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</profile>
|
||||||
|
</profiles>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -51,6 +51,7 @@ public class Constants {
|
||||||
public static final String RETRY_DELAY = "retryDelay";
|
public static final String RETRY_DELAY = "retryDelay";
|
||||||
public static final String CONNECT_TIMEOUT = "connectTimeOut";
|
public static final String CONNECT_TIMEOUT = "connectTimeOut";
|
||||||
public static final String READ_TIMEOUT = "readTimeOut";
|
public static final String READ_TIMEOUT = "readTimeOut";
|
||||||
|
public static final String REQUEST_METHOD = "requestMethod";
|
||||||
public static final String FROM_DATE_OVERRIDE = "fromDateOverride";
|
public static final String FROM_DATE_OVERRIDE = "fromDateOverride";
|
||||||
public static final String UNTIL_DATE_OVERRIDE = "untilDateOverride";
|
public static final String UNTIL_DATE_OVERRIDE = "untilDateOverride";
|
||||||
|
|
||||||
|
|
|
@ -7,12 +7,12 @@ import java.sql.*;
|
||||||
import java.util.function.Consumer;
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.slf4j.Logger;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
public class DbClient implements Closeable {
|
public class DbClient implements Closeable {
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(DbClient.class);
|
private static final Logger log = LoggerFactory.getLogger(DbClient.class);
|
||||||
|
|
||||||
private final Connection connection;
|
private final Connection connection;
|
||||||
|
|
||||||
|
@ -37,6 +37,8 @@ public class DbClient implements Closeable {
|
||||||
try (final Statement stmt = connection.createStatement()) {
|
try (final Statement stmt = connection.createStatement()) {
|
||||||
stmt.setFetchSize(100);
|
stmt.setFetchSize(100);
|
||||||
|
|
||||||
|
log.info("running SQL:\n\n{}\n\n", sql);
|
||||||
|
|
||||||
try (final ResultSet rs = stmt.executeQuery(sql)) {
|
try (final ResultSet rs = stmt.executeQuery(sql)) {
|
||||||
while (rs.next()) {
|
while (rs.next()) {
|
||||||
consumer.accept(rs);
|
consumer.accept(rs);
|
||||||
|
|
|
@ -38,7 +38,7 @@ public class PacePerson {
|
||||||
PacePerson.class
|
PacePerson.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/common/name_particles.txt")));
|
"/eu/dnetlib/dhp/common/name_particles.txt")));
|
||||||
} catch (IOException e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,53 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
|
|
||||||
import okhttp3.MediaType;
|
|
||||||
import okhttp3.RequestBody;
|
|
||||||
import okhttp3.internal.Util;
|
|
||||||
import okio.BufferedSink;
|
|
||||||
import okio.Okio;
|
|
||||||
import okio.Source;
|
|
||||||
|
|
||||||
public class InputStreamRequestBody extends RequestBody {
|
|
||||||
|
|
||||||
private final InputStream inputStream;
|
|
||||||
private final MediaType mediaType;
|
|
||||||
private final long lenght;
|
|
||||||
|
|
||||||
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
|
|
||||||
|
|
||||||
return new InputStreamRequestBody(inputStream, mediaType, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
|
|
||||||
this.inputStream = inputStream;
|
|
||||||
this.mediaType = mediaType;
|
|
||||||
this.lenght = len;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public MediaType contentType() {
|
|
||||||
return mediaType;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long contentLength() {
|
|
||||||
|
|
||||||
return lenght;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void writeTo(BufferedSink sink) throws IOException {
|
|
||||||
Source source = null;
|
|
||||||
try {
|
|
||||||
source = Okio.source(inputStream);
|
|
||||||
sink.writeAll(source);
|
|
||||||
} finally {
|
|
||||||
Util.closeQuietly(source);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,8 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api;
|
|
||||||
|
|
||||||
public class MissingConceptDoiException extends Throwable {
|
|
||||||
public MissingConceptDoiException(String message) {
|
|
||||||
super(message);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,365 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.HttpURLConnection;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import org.apache.http.HttpHeaders;
|
|
||||||
import org.apache.http.entity.ContentType;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
|
||||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
|
|
||||||
import okhttp3.*;
|
|
||||||
|
|
||||||
public class ZenodoAPIClient implements Serializable {
|
|
||||||
|
|
||||||
String urlString;
|
|
||||||
String bucket;
|
|
||||||
|
|
||||||
String deposition_id;
|
|
||||||
String access_token;
|
|
||||||
|
|
||||||
public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
|
|
||||||
|
|
||||||
private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
|
|
||||||
|
|
||||||
public String getUrlString() {
|
|
||||||
return urlString;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setUrlString(String urlString) {
|
|
||||||
this.urlString = urlString;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getBucket() {
|
|
||||||
return bucket;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setBucket(String bucket) {
|
|
||||||
this.bucket = bucket;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDeposition_id(String deposition_id) {
|
|
||||||
this.deposition_id = deposition_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ZenodoAPIClient(String urlString, String access_token) {
|
|
||||||
|
|
||||||
this.urlString = urlString;
|
|
||||||
this.access_token = access_token;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
|
|
||||||
*
|
|
||||||
* @return response code
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public int newDeposition() throws IOException {
|
|
||||||
String json = "{}";
|
|
||||||
|
|
||||||
URL url = new URL(urlString);
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setRequestMethod("POST");
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
try (OutputStream os = conn.getOutputStream()) {
|
|
||||||
byte[] input = json.getBytes("utf-8");
|
|
||||||
os.write(input, 0, input.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
String body = getBody(conn);
|
|
||||||
|
|
||||||
int responseCode = conn.getResponseCode();
|
|
||||||
conn.disconnect();
|
|
||||||
|
|
||||||
if (!checkOKStatus(responseCode))
|
|
||||||
throw new IOException("Unexpected code " + responseCode + body);
|
|
||||||
|
|
||||||
ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
|
|
||||||
this.bucket = newSubmission.getLinks().getBucket();
|
|
||||||
this.deposition_id = newSubmission.getId();
|
|
||||||
|
|
||||||
return responseCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Upload files in Zenodo.
|
|
||||||
*
|
|
||||||
* @param is the inputStream for the file to upload
|
|
||||||
* @param file_name the name of the file as it will appear on Zenodo
|
|
||||||
* @return the response code
|
|
||||||
*/
|
|
||||||
public int uploadIS(InputStream is, String file_name) throws IOException {
|
|
||||||
|
|
||||||
URL url = new URL(bucket + "/" + file_name);
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip");
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
conn.setRequestMethod("PUT");
|
|
||||||
|
|
||||||
byte[] buf = new byte[8192];
|
|
||||||
int length;
|
|
||||||
try (OutputStream os = conn.getOutputStream()) {
|
|
||||||
while ((length = is.read(buf)) != -1) {
|
|
||||||
os.write(buf, 0, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
int responseCode = conn.getResponseCode();
|
|
||||||
if (!checkOKStatus(responseCode)) {
|
|
||||||
throw new IOException("Unexpected code " + responseCode + getBody(conn));
|
|
||||||
}
|
|
||||||
|
|
||||||
return responseCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
@NotNull
|
|
||||||
private String getBody(HttpURLConnection conn) throws IOException {
|
|
||||||
String body = "{}";
|
|
||||||
try (BufferedReader br = new BufferedReader(
|
|
||||||
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
|
|
||||||
StringBuilder response = new StringBuilder();
|
|
||||||
String responseLine = null;
|
|
||||||
while ((responseLine = br.readLine()) != null) {
|
|
||||||
response.append(responseLine.trim());
|
|
||||||
}
|
|
||||||
|
|
||||||
body = response.toString();
|
|
||||||
|
|
||||||
}
|
|
||||||
return body;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Associates metadata information to the current deposition
|
|
||||||
*
|
|
||||||
* @param metadata the metadata
|
|
||||||
* @return response code
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public int sendMretadata(String metadata) throws IOException {
|
|
||||||
|
|
||||||
URL url = new URL(urlString + "/" + deposition_id);
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
conn.setRequestMethod("PUT");
|
|
||||||
|
|
||||||
try (OutputStream os = conn.getOutputStream()) {
|
|
||||||
byte[] input = metadata.getBytes("utf-8");
|
|
||||||
os.write(input, 0, input.length);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
final int responseCode = conn.getResponseCode();
|
|
||||||
conn.disconnect();
|
|
||||||
if (!checkOKStatus(responseCode))
|
|
||||||
throw new IOException("Unexpected code " + responseCode + getBody(conn));
|
|
||||||
|
|
||||||
return responseCode;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean checkOKStatus(int responseCode) {
|
|
||||||
|
|
||||||
if (HttpURLConnection.HTTP_OK != responseCode ||
|
|
||||||
HttpURLConnection.HTTP_CREATED != responseCode)
|
|
||||||
return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* To publish the current deposition. It works for both new deposition or new version of an old deposition
|
|
||||||
*
|
|
||||||
* @return response code
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public int publish() throws IOException {
|
|
||||||
|
|
||||||
String json = "{}";
|
|
||||||
|
|
||||||
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
|
|
||||||
|
|
||||||
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
|
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
|
||||||
.url(urlString + "/" + deposition_id + "/actions/publish")
|
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
|
||||||
.post(body)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
try (Response response = httpClient.newCall(request).execute()) {
|
|
||||||
|
|
||||||
if (!response.isSuccessful())
|
|
||||||
throw new IOException("Unexpected code " + response + response.body().string());
|
|
||||||
|
|
||||||
return response.code();
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
|
|
||||||
* for the new version.
|
|
||||||
*
|
|
||||||
* @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
|
|
||||||
* part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
|
|
||||||
* concept_rec_id = 656930
|
|
||||||
* @return response code
|
|
||||||
* @throws IOException
|
|
||||||
* @throws MissingConceptDoiException
|
|
||||||
*/
|
|
||||||
public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
|
||||||
setDepositionId(concept_rec_id, 1);
|
|
||||||
String json = "{}";
|
|
||||||
|
|
||||||
URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
conn.setRequestMethod("POST");
|
|
||||||
|
|
||||||
try (OutputStream os = conn.getOutputStream()) {
|
|
||||||
byte[] input = json.getBytes("utf-8");
|
|
||||||
os.write(input, 0, input.length);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
String body = getBody(conn);
|
|
||||||
|
|
||||||
int responseCode = conn.getResponseCode();
|
|
||||||
|
|
||||||
conn.disconnect();
|
|
||||||
if (!checkOKStatus(responseCode))
|
|
||||||
throw new IOException("Unexpected code " + responseCode + body);
|
|
||||||
|
|
||||||
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
|
|
||||||
String latest_draft = zenodoModel.getLinks().getLatest_draft();
|
|
||||||
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
|
|
||||||
bucket = getBucket(latest_draft);
|
|
||||||
|
|
||||||
return responseCode;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* To finish uploading a version or new deposition not published
|
|
||||||
* It sets the deposition_id and the bucket to be used
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* @param deposition_id the deposition id of the not yet published upload
|
|
||||||
* concept_rec_id = 656930
|
|
||||||
* @return response code
|
|
||||||
* @throws IOException
|
|
||||||
* @throws MissingConceptDoiException
|
|
||||||
*/
|
|
||||||
public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
|
|
||||||
|
|
||||||
this.deposition_id = deposition_id;
|
|
||||||
|
|
||||||
String json = "{}";
|
|
||||||
|
|
||||||
URL url = new URL(urlString + "/" + deposition_id);
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setRequestMethod("POST");
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
try (OutputStream os = conn.getOutputStream()) {
|
|
||||||
byte[] input = json.getBytes("utf-8");
|
|
||||||
os.write(input, 0, input.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
String body = getBody(conn);
|
|
||||||
|
|
||||||
int responseCode = conn.getResponseCode();
|
|
||||||
conn.disconnect();
|
|
||||||
|
|
||||||
if (!checkOKStatus(responseCode))
|
|
||||||
throw new IOException("Unexpected code " + responseCode + body);
|
|
||||||
|
|
||||||
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
|
|
||||||
bucket = zenodoModel.getLinks().getBucket();
|
|
||||||
|
|
||||||
return responseCode;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {
|
|
||||||
|
|
||||||
ZenodoModelList zenodoModelList = new Gson()
|
|
||||||
.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
|
|
||||||
|
|
||||||
for (ZenodoModel zm : zenodoModelList) {
|
|
||||||
if (zm.getConceptrecid().equals(concept_rec_id)) {
|
|
||||||
deposition_id = zm.getId();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (zenodoModelList.size() == 0)
|
|
||||||
throw new MissingConceptDoiException(
|
|
||||||
"The concept record id specified was missing in the list of depositions");
|
|
||||||
setDepositionId(concept_rec_id, page + 1);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getPrevDepositions(String page) throws IOException {
|
|
||||||
|
|
||||||
HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
|
|
||||||
urlBuilder.addQueryParameter("page", page);
|
|
||||||
|
|
||||||
URL url = new URL(urlBuilder.build().toString());
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
conn.setRequestMethod("GET");
|
|
||||||
|
|
||||||
String body = getBody(conn);
|
|
||||||
|
|
||||||
int responseCode = conn.getResponseCode();
|
|
||||||
|
|
||||||
conn.disconnect();
|
|
||||||
if (!checkOKStatus(responseCode))
|
|
||||||
throw new IOException("Unexpected code " + responseCode + body);
|
|
||||||
|
|
||||||
return body;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getBucket(String inputUurl) throws IOException {
|
|
||||||
|
|
||||||
URL url = new URL(inputUurl);
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
conn.setRequestMethod("GET");
|
|
||||||
|
|
||||||
String body = getBody(conn);
|
|
||||||
|
|
||||||
int responseCode = conn.getResponseCode();
|
|
||||||
|
|
||||||
conn.disconnect();
|
|
||||||
if (!checkOKStatus(responseCode))
|
|
||||||
throw new IOException("Unexpected code " + responseCode + body);
|
|
||||||
|
|
||||||
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
|
|
||||||
|
|
||||||
return zenodoModel.getLinks().getBucket();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.context;
|
||||||
|
|
||||||
|
public class CategorySummary {
|
||||||
|
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
private String label;
|
||||||
|
|
||||||
|
private boolean hasConcept;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLabel() {
|
||||||
|
return label;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isHasConcept() {
|
||||||
|
return hasConcept;
|
||||||
|
}
|
||||||
|
|
||||||
|
public CategorySummary setId(final String id) {
|
||||||
|
this.id = id;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public CategorySummary setLabel(final String label) {
|
||||||
|
this.label = label;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public CategorySummary setHasConcept(final boolean hasConcept) {
|
||||||
|
this.hasConcept = hasConcept;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.context;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
public class CategorySummaryList extends ArrayList<CategorySummary> {
|
||||||
|
}
|
|
@ -0,0 +1,52 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.context;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class ConceptSummary {
|
||||||
|
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
private String label;
|
||||||
|
|
||||||
|
public boolean hasSubConcept;
|
||||||
|
|
||||||
|
private List<ConceptSummary> concepts;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLabel() {
|
||||||
|
return label;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ConceptSummary> getConcepts() {
|
||||||
|
return concepts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ConceptSummary setId(final String id) {
|
||||||
|
this.id = id;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ConceptSummary setLabel(final String label) {
|
||||||
|
this.label = label;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isHasSubConcept() {
|
||||||
|
return hasSubConcept;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ConceptSummary setHasSubConcept(final boolean hasSubConcept) {
|
||||||
|
this.hasSubConcept = hasSubConcept;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ConceptSummary setConcept(final List<ConceptSummary> concepts) {
|
||||||
|
this.concepts = concepts;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.context;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
public class ConceptSummaryList extends ArrayList<ConceptSummary> {
|
||||||
|
}
|
|
@ -0,0 +1,50 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.context;
|
||||||
|
|
||||||
|
public class ContextSummary {
|
||||||
|
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
private String label;
|
||||||
|
|
||||||
|
private String type;
|
||||||
|
|
||||||
|
private String status;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLabel() {
|
||||||
|
return label;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStatus() {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ContextSummary setId(final String id) {
|
||||||
|
this.id = id;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ContextSummary setLabel(final String label) {
|
||||||
|
this.label = label;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ContextSummary setType(final String type) {
|
||||||
|
this.type = type;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ContextSummary setStatus(final String status) {
|
||||||
|
this.status = status;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.api.context;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
public class ContextSummaryList extends ArrayList<ContextSummary> {
|
||||||
|
}
|
|
@ -1,14 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
public class Community {
|
|
||||||
private String identifier;
|
|
||||||
|
|
||||||
public String getIdentifier() {
|
|
||||||
return identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setIdentifier(String identifier) {
|
|
||||||
this.identifier = identifier;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,47 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
public class Creator {
|
|
||||||
private String affiliation;
|
|
||||||
private String name;
|
|
||||||
private String orcid;
|
|
||||||
|
|
||||||
public String getAffiliation() {
|
|
||||||
return affiliation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setAffiliation(String affiliation) {
|
|
||||||
this.affiliation = affiliation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getName() {
|
|
||||||
return name;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setName(String name) {
|
|
||||||
this.name = name;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getOrcid() {
|
|
||||||
return orcid;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setOrcid(String orcid) {
|
|
||||||
this.orcid = orcid;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Creator newInstance(String name, String affiliation, String orcid) {
|
|
||||||
Creator c = new Creator();
|
|
||||||
if (name != null) {
|
|
||||||
c.name = name;
|
|
||||||
}
|
|
||||||
if (affiliation != null) {
|
|
||||||
c.affiliation = affiliation;
|
|
||||||
}
|
|
||||||
if (orcid != null) {
|
|
||||||
c.orcid = orcid;
|
|
||||||
}
|
|
||||||
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,44 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
public class File implements Serializable {
|
|
||||||
private String checksum;
|
|
||||||
private String filename;
|
|
||||||
private long filesize;
|
|
||||||
private String id;
|
|
||||||
|
|
||||||
public String getChecksum() {
|
|
||||||
return checksum;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setChecksum(String checksum) {
|
|
||||||
this.checksum = checksum;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getFilename() {
|
|
||||||
return filename;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFilename(String filename) {
|
|
||||||
this.filename = filename;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getFilesize() {
|
|
||||||
return filesize;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFilesize(long filesize) {
|
|
||||||
this.filesize = filesize;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,23 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
public class Grant implements Serializable {
|
|
||||||
private String id;
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Grant newInstance(String id) {
|
|
||||||
Grant g = new Grant();
|
|
||||||
g.id = id;
|
|
||||||
|
|
||||||
return g;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,92 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
public class Links implements Serializable {
|
|
||||||
|
|
||||||
private String bucket;
|
|
||||||
|
|
||||||
private String discard;
|
|
||||||
|
|
||||||
private String edit;
|
|
||||||
private String files;
|
|
||||||
private String html;
|
|
||||||
private String latest_draft;
|
|
||||||
private String latest_draft_html;
|
|
||||||
private String publish;
|
|
||||||
|
|
||||||
private String self;
|
|
||||||
|
|
||||||
public String getBucket() {
|
|
||||||
return bucket;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setBucket(String bucket) {
|
|
||||||
this.bucket = bucket;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDiscard() {
|
|
||||||
return discard;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDiscard(String discard) {
|
|
||||||
this.discard = discard;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getEdit() {
|
|
||||||
return edit;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setEdit(String edit) {
|
|
||||||
this.edit = edit;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getFiles() {
|
|
||||||
return files;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFiles(String files) {
|
|
||||||
this.files = files;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getHtml() {
|
|
||||||
return html;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setHtml(String html) {
|
|
||||||
this.html = html;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getLatest_draft() {
|
|
||||||
return latest_draft;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLatest_draft(String latest_draft) {
|
|
||||||
this.latest_draft = latest_draft;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getLatest_draft_html() {
|
|
||||||
return latest_draft_html;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLatest_draft_html(String latest_draft_html) {
|
|
||||||
this.latest_draft_html = latest_draft_html;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getPublish() {
|
|
||||||
return publish;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setPublish(String publish) {
|
|
||||||
this.publish = publish;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getSelf() {
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSelf(String self) {
|
|
||||||
this.self = self;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,153 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class Metadata implements Serializable {
|
|
||||||
|
|
||||||
private String access_right;
|
|
||||||
private List<Community> communities;
|
|
||||||
private List<Creator> creators;
|
|
||||||
private String description;
|
|
||||||
private String doi;
|
|
||||||
private List<Grant> grants;
|
|
||||||
private List<String> keywords;
|
|
||||||
private String language;
|
|
||||||
private String license;
|
|
||||||
private PrereserveDoi prereserve_doi;
|
|
||||||
private String publication_date;
|
|
||||||
private List<String> references;
|
|
||||||
private List<RelatedIdentifier> related_identifiers;
|
|
||||||
private String title;
|
|
||||||
private String upload_type;
|
|
||||||
private String version;
|
|
||||||
|
|
||||||
public String getUpload_type() {
|
|
||||||
return upload_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setUpload_type(String upload_type) {
|
|
||||||
this.upload_type = upload_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVersion() {
|
|
||||||
return version;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setVersion(String version) {
|
|
||||||
this.version = version;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getAccess_right() {
|
|
||||||
return access_right;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setAccess_right(String access_right) {
|
|
||||||
this.access_right = access_right;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Community> getCommunities() {
|
|
||||||
return communities;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCommunities(List<Community> communities) {
|
|
||||||
this.communities = communities;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Creator> getCreators() {
|
|
||||||
return creators;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCreators(List<Creator> creators) {
|
|
||||||
this.creators = creators;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDescription() {
|
|
||||||
return description;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDescription(String description) {
|
|
||||||
this.description = description;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDoi() {
|
|
||||||
return doi;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDoi(String doi) {
|
|
||||||
this.doi = doi;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Grant> getGrants() {
|
|
||||||
return grants;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setGrants(List<Grant> grants) {
|
|
||||||
this.grants = grants;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getKeywords() {
|
|
||||||
return keywords;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setKeywords(List<String> keywords) {
|
|
||||||
this.keywords = keywords;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getLanguage() {
|
|
||||||
return language;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLanguage(String language) {
|
|
||||||
this.language = language;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getLicense() {
|
|
||||||
return license;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLicense(String license) {
|
|
||||||
this.license = license;
|
|
||||||
}
|
|
||||||
|
|
||||||
public PrereserveDoi getPrereserve_doi() {
|
|
||||||
return prereserve_doi;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
|
|
||||||
this.prereserve_doi = prereserve_doi;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getPublication_date() {
|
|
||||||
return publication_date;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setPublication_date(String publication_date) {
|
|
||||||
this.publication_date = publication_date;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getReferences() {
|
|
||||||
return references;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setReferences(List<String> references) {
|
|
||||||
this.references = references;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<RelatedIdentifier> getRelated_identifiers() {
|
|
||||||
return related_identifiers;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
|
|
||||||
this.related_identifiers = related_identifiers;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTitle() {
|
|
||||||
return title;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setTitle(String title) {
|
|
||||||
this.title = title;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,25 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
public class PrereserveDoi implements Serializable {
|
|
||||||
private String doi;
|
|
||||||
private String recid;
|
|
||||||
|
|
||||||
public String getDoi() {
|
|
||||||
return doi;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDoi(String doi) {
|
|
||||||
this.doi = doi;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getRecid() {
|
|
||||||
return recid;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRecid(String recid) {
|
|
||||||
this.recid = recid;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,43 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
public class RelatedIdentifier implements Serializable {
|
|
||||||
private String identifier;
|
|
||||||
private String relation;
|
|
||||||
private String resource_type;
|
|
||||||
private String scheme;
|
|
||||||
|
|
||||||
public String getIdentifier() {
|
|
||||||
return identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setIdentifier(String identifier) {
|
|
||||||
this.identifier = identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getRelation() {
|
|
||||||
return relation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRelation(String relation) {
|
|
||||||
this.relation = relation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getResource_type() {
|
|
||||||
return resource_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setResource_type(String resource_type) {
|
|
||||||
this.resource_type = resource_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScheme() {
|
|
||||||
return scheme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setScheme(String scheme) {
|
|
||||||
this.scheme = scheme;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,118 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class ZenodoModel implements Serializable {
|
|
||||||
|
|
||||||
private String conceptrecid;
|
|
||||||
private String created;
|
|
||||||
|
|
||||||
private List<File> files;
|
|
||||||
private String id;
|
|
||||||
private Links links;
|
|
||||||
private Metadata metadata;
|
|
||||||
private String modified;
|
|
||||||
private String owner;
|
|
||||||
private String record_id;
|
|
||||||
private String state;
|
|
||||||
private boolean submitted;
|
|
||||||
private String title;
|
|
||||||
|
|
||||||
public String getConceptrecid() {
|
|
||||||
return conceptrecid;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setConceptrecid(String conceptrecid) {
|
|
||||||
this.conceptrecid = conceptrecid;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getCreated() {
|
|
||||||
return created;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCreated(String created) {
|
|
||||||
this.created = created;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<File> getFiles() {
|
|
||||||
return files;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFiles(List<File> files) {
|
|
||||||
this.files = files;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Links getLinks() {
|
|
||||||
return links;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLinks(Links links) {
|
|
||||||
this.links = links;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Metadata getMetadata() {
|
|
||||||
return metadata;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setMetadata(Metadata metadata) {
|
|
||||||
this.metadata = metadata;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getModified() {
|
|
||||||
return modified;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setModified(String modified) {
|
|
||||||
this.modified = modified;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getOwner() {
|
|
||||||
return owner;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setOwner(String owner) {
|
|
||||||
this.owner = owner;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getRecord_id() {
|
|
||||||
return record_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRecord_id(String record_id) {
|
|
||||||
this.record_id = record_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getState() {
|
|
||||||
return state;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setState(String state) {
|
|
||||||
this.state = state;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isSubmitted() {
|
|
||||||
return submitted;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSubmitted(boolean submitted) {
|
|
||||||
this.submitted = submitted;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTitle() {
|
|
||||||
return title;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setTitle(String title) {
|
|
||||||
this.title = title;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,7 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
public class ZenodoModelList extends ArrayList<ZenodoModel> {
|
|
||||||
}
|
|
|
@ -1,6 +1,9 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.collection;
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bundles the http connection parameters driving the client behaviour.
|
* Bundles the http connection parameters driving the client behaviour.
|
||||||
*/
|
*/
|
||||||
|
@ -13,6 +16,8 @@ public class HttpClientParams {
|
||||||
public static int _connectTimeOut = 10; // seconds
|
public static int _connectTimeOut = 10; // seconds
|
||||||
public static int _readTimeOut = 30; // seconds
|
public static int _readTimeOut = 30; // seconds
|
||||||
|
|
||||||
|
public static String _requestMethod = "GET";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maximum number of allowed retires before failing
|
* Maximum number of allowed retires before failing
|
||||||
*/
|
*/
|
||||||
|
@ -38,17 +43,30 @@ public class HttpClientParams {
|
||||||
*/
|
*/
|
||||||
private int readTimeOut;
|
private int readTimeOut;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Custom http headers
|
||||||
|
*/
|
||||||
|
private Map<String, String> headers;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Request method (i.e., GET, POST etc)
|
||||||
|
*/
|
||||||
|
private String requestMethod;
|
||||||
|
|
||||||
public HttpClientParams() {
|
public HttpClientParams() {
|
||||||
this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut);
|
this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut, new HashMap<>(),
|
||||||
|
_requestMethod);
|
||||||
}
|
}
|
||||||
|
|
||||||
public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut,
|
public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut,
|
||||||
int readTimeOut) {
|
int readTimeOut, Map<String, String> headers, String requestMethod) {
|
||||||
this.maxNumberOfRetry = maxNumberOfRetry;
|
this.maxNumberOfRetry = maxNumberOfRetry;
|
||||||
this.requestDelay = requestDelay;
|
this.requestDelay = requestDelay;
|
||||||
this.retryDelay = retryDelay;
|
this.retryDelay = retryDelay;
|
||||||
this.connectTimeOut = connectTimeOut;
|
this.connectTimeOut = connectTimeOut;
|
||||||
this.readTimeOut = readTimeOut;
|
this.readTimeOut = readTimeOut;
|
||||||
|
this.headers = headers;
|
||||||
|
this.requestMethod = requestMethod;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getMaxNumberOfRetry() {
|
public int getMaxNumberOfRetry() {
|
||||||
|
@ -91,4 +109,19 @@ public class HttpClientParams {
|
||||||
this.readTimeOut = readTimeOut;
|
this.readTimeOut = readTimeOut;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Map<String, String> getHeaders() {
|
||||||
|
return headers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHeaders(Map<String, String> headers) {
|
||||||
|
this.headers = headers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRequestMethod() {
|
||||||
|
return requestMethod;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRequestMethod(String requestMethod) {
|
||||||
|
this.requestMethod = requestMethod;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ import java.io.InputStream;
|
||||||
import java.net.*;
|
import java.net.*;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.math.NumberUtils;
|
import org.apache.commons.lang3.math.NumberUtils;
|
||||||
|
@ -94,24 +95,32 @@ public class HttpConnector2 {
|
||||||
throw new CollectorException(msg);
|
throw new CollectorException(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info("Request attempt {} [{}]", retryNumber, requestUrl);
|
|
||||||
|
|
||||||
InputStream input = null;
|
InputStream input = null;
|
||||||
|
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
try {
|
try {
|
||||||
if (getClientParams().getRequestDelay() > 0) {
|
if (getClientParams().getRequestDelay() > 0) {
|
||||||
backoffAndSleep(getClientParams().getRequestDelay());
|
backoffAndSleep(getClientParams().getRequestDelay());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.info("Request attempt {} [{}]", retryNumber, requestUrl);
|
||||||
|
|
||||||
final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
|
final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
|
||||||
urlConn.setInstanceFollowRedirects(false);
|
urlConn.setInstanceFollowRedirects(false);
|
||||||
urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000);
|
urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000);
|
||||||
urlConn.setConnectTimeout(getClientParams().getConnectTimeOut() * 1000);
|
urlConn.setConnectTimeout(getClientParams().getConnectTimeOut() * 1000);
|
||||||
urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent);
|
urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent);
|
||||||
|
urlConn.setRequestMethod(getClientParams().getRequestMethod());
|
||||||
|
|
||||||
if (log.isDebugEnabled()) {
|
// if provided, add custom headers
|
||||||
logHeaderFields(urlConn);
|
if (!getClientParams().getHeaders().isEmpty()) {
|
||||||
|
for (Map.Entry<String, String> headerEntry : getClientParams().getHeaders().entrySet()) {
|
||||||
|
urlConn.addRequestProperty(headerEntry.getKey(), headerEntry.getValue());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logHeaderFields(urlConn);
|
||||||
|
|
||||||
int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
|
int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
|
||||||
String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT);
|
String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT);
|
||||||
String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING);
|
String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING);
|
||||||
|
@ -125,9 +134,7 @@ public class HttpConnector2 {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is2xx(urlConn.getResponseCode())) {
|
if (is2xx(urlConn.getResponseCode())) {
|
||||||
input = urlConn.getInputStream();
|
return getInputStream(urlConn, start);
|
||||||
responseType = urlConn.getContentType();
|
|
||||||
return input;
|
|
||||||
}
|
}
|
||||||
if (is3xx(urlConn.getResponseCode())) {
|
if (is3xx(urlConn.getResponseCode())) {
|
||||||
// REDIRECTS
|
// REDIRECTS
|
||||||
|
@ -137,6 +144,7 @@ public class HttpConnector2 {
|
||||||
.put(
|
.put(
|
||||||
REPORT_PREFIX + urlConn.getResponseCode(),
|
REPORT_PREFIX + urlConn.getResponseCode(),
|
||||||
String.format("Moved to: %s", newUrl));
|
String.format("Moved to: %s", newUrl));
|
||||||
|
logRequestTime(start);
|
||||||
urlConn.disconnect();
|
urlConn.disconnect();
|
||||||
if (retryAfter > 0) {
|
if (retryAfter > 0) {
|
||||||
backoffAndSleep(retryAfter);
|
backoffAndSleep(retryAfter);
|
||||||
|
@ -152,26 +160,50 @@ public class HttpConnector2 {
|
||||||
if (retryAfter > 0) {
|
if (retryAfter > 0) {
|
||||||
log
|
log
|
||||||
.warn(
|
.warn(
|
||||||
"{} - waiting and repeating request after suggested retry-after {} sec.",
|
"waiting and repeating request after suggested retry-after {} sec for URL {}",
|
||||||
requestUrl, retryAfter);
|
retryAfter, requestUrl);
|
||||||
backoffAndSleep(retryAfter * 1000);
|
backoffAndSleep(retryAfter * 1000);
|
||||||
} else {
|
} else {
|
||||||
log
|
log
|
||||||
.warn(
|
.warn(
|
||||||
"{} - waiting and repeating request after default delay of {} sec.",
|
"waiting and repeating request after default delay of {} sec for URL {}",
|
||||||
requestUrl, getClientParams().getRetryDelay());
|
getClientParams().getRetryDelay(), requestUrl);
|
||||||
backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
|
backoffAndSleep(retryNumber * getClientParams().getRetryDelay());
|
||||||
}
|
}
|
||||||
report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
|
report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
|
||||||
|
|
||||||
|
logRequestTime(start);
|
||||||
|
|
||||||
urlConn.disconnect();
|
urlConn.disconnect();
|
||||||
|
|
||||||
return attemptDownload(requestUrl, retryNumber + 1, report);
|
return attemptDownload(requestUrl, retryNumber + 1, report);
|
||||||
|
case 422: // UNPROCESSABLE ENTITY
|
||||||
|
report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
|
||||||
|
log.warn("waiting and repeating request after 10 sec for URL {}", requestUrl);
|
||||||
|
backoffAndSleep(10000);
|
||||||
|
urlConn.disconnect();
|
||||||
|
logRequestTime(start);
|
||||||
|
try {
|
||||||
|
return getInputStream(urlConn, start);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log
|
||||||
|
.error(
|
||||||
|
"server returned 422 and got IOException accessing the response body from URL {}",
|
||||||
|
requestUrl);
|
||||||
|
log.error("IOException:", e);
|
||||||
|
return attemptDownload(requestUrl, retryNumber + 1, report);
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
|
log.error("gor error {} from URL: {}", urlConn.getResponseCode(), urlConn.getURL());
|
||||||
|
log.error("response message: {}", urlConn.getResponseMessage());
|
||||||
report
|
report
|
||||||
.put(
|
.put(
|
||||||
REPORT_PREFIX + urlConn.getResponseCode(),
|
REPORT_PREFIX + urlConn.getResponseCode(),
|
||||||
String
|
String
|
||||||
.format(
|
.format(
|
||||||
"%s Error: %s", requestUrl, urlConn.getResponseMessage()));
|
"%s Error: %s", requestUrl, urlConn.getResponseMessage()));
|
||||||
|
logRequestTime(start);
|
||||||
|
urlConn.disconnect();
|
||||||
throw new CollectorException(urlConn.getResponseCode() + " error " + report);
|
throw new CollectorException(urlConn.getResponseCode() + " error " + report);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -180,11 +212,11 @@ public class HttpConnector2 {
|
||||||
.format(
|
.format(
|
||||||
"Unexpected status code: %s errors: %s", urlConn.getResponseCode(),
|
"Unexpected status code: %s errors: %s", urlConn.getResponseCode(),
|
||||||
MAPPER.writeValueAsString(report)));
|
MAPPER.writeValueAsString(report)));
|
||||||
} catch (MalformedURLException | UnknownHostException e) {
|
} catch (MalformedURLException e) {
|
||||||
log.error(e.getMessage(), e);
|
log.error(e.getMessage(), e);
|
||||||
report.put(e.getClass().getName(), e.getMessage());
|
report.put(e.getClass().getName(), e.getMessage());
|
||||||
throw new CollectorException(e.getMessage(), e);
|
throw new CollectorException(e.getMessage(), e);
|
||||||
} catch (SocketTimeoutException | SocketException e) {
|
} catch (SocketTimeoutException | SocketException | UnknownHostException e) {
|
||||||
log.error(e.getMessage(), e);
|
log.error(e.getMessage(), e);
|
||||||
report.put(e.getClass().getName(), e.getMessage());
|
report.put(e.getClass().getName(), e.getMessage());
|
||||||
backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000);
|
backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000);
|
||||||
|
@ -192,13 +224,27 @@ public class HttpConnector2 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private InputStream getInputStream(HttpURLConnection urlConn, long start) throws IOException {
|
||||||
|
InputStream input = urlConn.getInputStream();
|
||||||
|
responseType = urlConn.getContentType();
|
||||||
|
logRequestTime(start);
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void logRequestTime(long start) {
|
||||||
|
log
|
||||||
|
.info(
|
||||||
|
"request time elapsed: {}sec",
|
||||||
|
TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - start));
|
||||||
|
}
|
||||||
|
|
||||||
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
|
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
|
||||||
log.debug("StatusCode: {}", urlConn.getResponseMessage());
|
log.info("Response: {} - {}", urlConn.getResponseCode(), urlConn.getResponseMessage());
|
||||||
|
|
||||||
for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
|
for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
|
||||||
if (e.getKey() != null) {
|
if (e.getKey() != null) {
|
||||||
for (String v : e.getValue()) {
|
for (String v : e.getValue()) {
|
||||||
log.debug(" key: {} - value: {}", e.getKey(), v);
|
log.info(" key: {} - value: {}", e.getKey(), v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -218,7 +264,7 @@ public class HttpConnector2 {
|
||||||
for (String key : headerMap.keySet()) {
|
for (String key : headerMap.keySet()) {
|
||||||
if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty())
|
if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty())
|
||||||
&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
|
&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
|
||||||
return Integer.parseInt(headerMap.get(key).get(0)) + 10;
|
return Integer.parseInt(headerMap.get(key).get(0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
@ -63,7 +63,10 @@ public class Vocabulary implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public VocabularyTerm getTermBySynonym(final String syn) {
|
public VocabularyTerm getTermBySynonym(final String syn) {
|
||||||
return getTerm(synonyms.get(syn.toLowerCase()));
|
return Optional
|
||||||
|
.ofNullable(syn)
|
||||||
|
.map(s -> getTerm(synonyms.get(s.toLowerCase())))
|
||||||
|
.orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Qualifier getTermAsQualifier(final String termId) {
|
public Qualifier getTermAsQualifier(final String termId) {
|
||||||
|
|
|
@ -135,6 +135,24 @@ public class VocabularyGroup implements Serializable {
|
||||||
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
|
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Qualifier lookupTermBySynonym(final String vocId, final String syn) {
|
||||||
|
return find(vocId)
|
||||||
|
.map(
|
||||||
|
vocabulary -> Optional
|
||||||
|
.ofNullable(vocabulary.getTerm(syn))
|
||||||
|
.map(
|
||||||
|
term -> OafMapperUtils
|
||||||
|
.qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
|
||||||
|
.orElse(
|
||||||
|
Optional
|
||||||
|
.ofNullable(vocabulary.getTermBySynonym(syn))
|
||||||
|
.map(
|
||||||
|
term -> OafMapperUtils
|
||||||
|
.qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
|
||||||
|
.orElse(null)))
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* getSynonymAsQualifierCaseSensitive
|
* getSynonymAsQualifierCaseSensitive
|
||||||
*
|
*
|
||||||
|
|
|
@ -10,6 +10,7 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
import com.wcohen.ss.JaroWinkler;
|
import com.wcohen.ss.JaroWinkler;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.pace.model.Person;
|
import eu.dnetlib.pace.model.Person;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -119,11 +120,47 @@ public class AuthorMerger {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String normalizeFullName(final String fullname) {
|
||||||
|
return nfd(fullname)
|
||||||
|
.toLowerCase()
|
||||||
|
// do not compact the regexes in a single expression, would cause StackOverflowError
|
||||||
|
// in case
|
||||||
|
// of large input strings
|
||||||
|
.replaceAll("(\\W)+", " ")
|
||||||
|
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
|
||||||
|
.replaceAll("(\\p{Punct})+", " ")
|
||||||
|
.replaceAll("(\\d)+", " ")
|
||||||
|
.replaceAll("(\\n)+", " ")
|
||||||
|
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String authorFieldToBeCompared(Author author) {
|
||||||
|
if (StringUtils.isNotBlank(author.getSurname())) {
|
||||||
|
return author.getSurname();
|
||||||
|
|
||||||
|
}
|
||||||
|
if (StringUtils.isNotBlank(author.getFullname())) {
|
||||||
|
return author.getFullname();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
public static String pidToComparableString(StructuredProperty pid) {
|
public static String pidToComparableString(StructuredProperty pid) {
|
||||||
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
|
final String classId = Optional
|
||||||
: "";
|
.ofNullable(pid)
|
||||||
return (pid.getQualifier() != null ? classid : "")
|
.map(
|
||||||
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
p -> Optional
|
||||||
|
.ofNullable(p.getQualifier())
|
||||||
|
.map(Qualifier::getClassid)
|
||||||
|
.map(String::toLowerCase)
|
||||||
|
.orElse(""))
|
||||||
|
.orElse("");
|
||||||
|
return Optional
|
||||||
|
.ofNullable(pid)
|
||||||
|
.map(StructuredProperty::getValue)
|
||||||
|
.map(v -> String.join("|", v, classId))
|
||||||
|
.orElse("");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int countAuthorsPids(List<Author> authors) {
|
public static int countAuthorsPids(List<Author> authors) {
|
||||||
|
@ -171,7 +208,7 @@ public class AuthorMerger {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String normalize(final String s) {
|
public static String normalize(final String s) {
|
||||||
String[] normalized = nfd(s)
|
String[] normalized = nfd(s)
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
// do not compact the regexes in a single expression, would cause StackOverflowError
|
// do not compact the regexes in a single expression, would cause StackOverflowError
|
||||||
|
|
|
@ -1,97 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.merge;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.sql.*;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
|
|
||||||
public class DispatchEntitiesSparkJob {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DispatchEntitiesSparkJob.class);
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
|
||||||
|
|
||||||
String jsonConfiguration = IOUtils
|
|
||||||
.toString(
|
|
||||||
Objects
|
|
||||||
.requireNonNull(
|
|
||||||
DispatchEntitiesSparkJob.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json")));
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.TRUE);
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
|
||||||
|
|
||||||
String inputPath = parser.get("inputPath");
|
|
||||||
log.info("inputPath: {}", inputPath);
|
|
||||||
|
|
||||||
String outputPath = parser.get("outputPath");
|
|
||||||
log.info("outputPath: {}", outputPath);
|
|
||||||
|
|
||||||
boolean filterInvisible = Boolean.parseBoolean(parser.get("filterInvisible"));
|
|
||||||
log.info("filterInvisible: {}", filterInvisible);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> dispatchEntities(spark, inputPath, outputPath, filterInvisible));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void dispatchEntities(
|
|
||||||
SparkSession spark,
|
|
||||||
String inputPath,
|
|
||||||
String outputPath,
|
|
||||||
boolean filterInvisible) {
|
|
||||||
|
|
||||||
Dataset<String> df = spark.read().textFile(inputPath);
|
|
||||||
|
|
||||||
ModelSupport.oafTypes.entrySet().parallelStream().forEach(entry -> {
|
|
||||||
String entityType = entry.getKey();
|
|
||||||
Class<?> clazz = entry.getValue();
|
|
||||||
|
|
||||||
final String entityPath = outputPath + "/" + entityType;
|
|
||||||
if (!entityType.equalsIgnoreCase("relation")) {
|
|
||||||
HdfsSupport.remove(entityPath, spark.sparkContext().hadoopConfiguration());
|
|
||||||
Dataset<Row> entityDF = spark
|
|
||||||
.read()
|
|
||||||
.schema(Encoders.bean(clazz).schema())
|
|
||||||
.json(
|
|
||||||
df
|
|
||||||
.filter((FilterFunction<String>) s -> s.startsWith(clazz.getName()))
|
|
||||||
.map(
|
|
||||||
(MapFunction<String, String>) s -> StringUtils.substringAfter(s, "|"),
|
|
||||||
Encoders.STRING()));
|
|
||||||
|
|
||||||
if (filterInvisible) {
|
|
||||||
entityDF = entityDF.filter("dataInfo.invisible != true");
|
|
||||||
}
|
|
||||||
|
|
||||||
entityDF
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.json(entityPath);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -2,50 +2,49 @@
|
||||||
package eu.dnetlib.dhp.oa.merge;
|
package eu.dnetlib.dhp.oa.merge;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import static eu.dnetlib.dhp.utils.DHPUtils.toSeq;
|
import static org.apache.spark.sql.functions.col;
|
||||||
|
import static org.apache.spark.sql.functions.when;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.util.Map;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.ForkJoinPool;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
import org.apache.spark.sql.*;
|
import org.apache.spark.sql.*;
|
||||||
import org.apache.spark.sql.expressions.Aggregator;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import com.jayway.jsonpath.Configuration;
|
|
||||||
import com.jayway.jsonpath.DocumentContext;
|
|
||||||
import com.jayway.jsonpath.JsonPath;
|
|
||||||
import com.jayway.jsonpath.Option;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Groups the graph content by entity identifier to ensure ID uniqueness
|
* Groups the graph content by entity identifier to ensure ID uniqueness
|
||||||
*/
|
*/
|
||||||
public class GroupEntitiesSparkJob {
|
public class GroupEntitiesSparkJob {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class);
|
private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class);
|
||||||
|
|
||||||
private static final String ID_JPATH = "$.id";
|
private static final Encoder<OafEntity> OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class);
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
private ArgumentApplicationParser parser;
|
||||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
|
||||||
|
public GroupEntitiesSparkJob(ArgumentApplicationParser parser) {
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
@ -63,141 +62,133 @@ public class GroupEntitiesSparkJob {
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String isLookupUrl = parser.get("isLookupUrl");
|
||||||
|
log.info("isLookupUrl: {}", isLookupUrl);
|
||||||
|
|
||||||
|
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||||
|
|
||||||
|
new GroupEntitiesSparkJob(parser).run(isSparkSessionManaged, isLookupService);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run(Boolean isSparkSessionManaged, ISLookUpService isLookUpService)
|
||||||
|
throws ISLookUpException {
|
||||||
|
|
||||||
String graphInputPath = parser.get("graphInputPath");
|
String graphInputPath = parser.get("graphInputPath");
|
||||||
log.info("graphInputPath: {}", graphInputPath);
|
log.info("graphInputPath: {}", graphInputPath);
|
||||||
|
|
||||||
|
String checkpointPath = parser.get("checkpointPath");
|
||||||
|
log.info("checkpointPath: {}", checkpointPath);
|
||||||
|
|
||||||
String outputPath = parser.get("outputPath");
|
String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
boolean filterInvisible = Boolean.parseBoolean(parser.get("filterInvisible"));
|
||||||
|
log.info("filterInvisible: {}", filterInvisible);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
||||||
conf.registerKryoClasses(ModelSupport.getOafModelClasses());
|
conf.registerKryoClasses(ModelSupport.getOafModelClasses());
|
||||||
|
|
||||||
|
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration());
|
HdfsSupport.remove(checkpointPath, spark.sparkContext().hadoopConfiguration());
|
||||||
groupEntities(spark, graphInputPath, outputPath);
|
groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible, vocs);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void groupEntities(
|
private static void groupEntities(
|
||||||
SparkSession spark,
|
SparkSession spark,
|
||||||
String inputPath,
|
String inputPath,
|
||||||
String outputPath) {
|
String checkpointPath,
|
||||||
|
String outputPath,
|
||||||
|
boolean filterInvisible, VocabularyGroup vocs) {
|
||||||
|
|
||||||
final TypedColumn<OafEntity, OafEntity> aggregator = new GroupingAggregator().toColumn();
|
Dataset<OafEntity> allEntities = spark.emptyDataset(OAFENTITY_KRYO_ENC);
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
||||||
spark
|
for (Map.Entry<EntityType, Class> e : ModelSupport.entityTypes.entrySet()) {
|
||||||
.read()
|
String entity = e.getKey().name();
|
||||||
.textFile(toSeq(listEntityPaths(inputPath, sc)))
|
Class<? extends OafEntity> entityClass = e.getValue();
|
||||||
.map((MapFunction<String, OafEntity>) GroupEntitiesSparkJob::parseOaf, Encoders.kryo(OafEntity.class))
|
String entityInputPath = inputPath + "/" + entity;
|
||||||
.filter((FilterFunction<OafEntity>) e -> StringUtils.isNotBlank(ModelSupport.idFn().apply(e)))
|
|
||||||
.groupByKey((MapFunction<OafEntity, String>) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING())
|
if (!HdfsSupport.exists(entityInputPath, spark.sparkContext().hadoopConfiguration())) {
|
||||||
.agg(aggregator)
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
allEntities = allEntities
|
||||||
|
.union(
|
||||||
|
((Dataset<OafEntity>) spark
|
||||||
|
.read()
|
||||||
|
.schema(Encoders.bean(entityClass).schema())
|
||||||
|
.json(entityInputPath)
|
||||||
|
.filter("length(id) > 0")
|
||||||
|
.as(Encoders.bean(entityClass)))
|
||||||
|
.map((MapFunction<OafEntity, OafEntity>) r -> r, OAFENTITY_KRYO_ENC));
|
||||||
|
}
|
||||||
|
|
||||||
|
Dataset<?> groupedEntities = allEntities
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<Tuple2<String, OafEntity>, String>) t -> t._2().getClass().getName() +
|
(MapFunction<OafEntity, OafEntity>) entity -> GraphCleaningFunctions
|
||||||
"|" + OBJECT_MAPPER.writeValueAsString(t._2()),
|
.applyCoarVocabularies(entity, vocs),
|
||||||
Encoders.STRING())
|
OAFENTITY_KRYO_ENC)
|
||||||
|
.groupByKey((MapFunction<OafEntity, String>) OafEntity::getId, Encoders.STRING())
|
||||||
|
.mapGroups((MapGroupsFunction<String, OafEntity, OafEntity>) MergeUtils::mergeById, OAFENTITY_KRYO_ENC)
|
||||||
|
.map(
|
||||||
|
(MapFunction<OafEntity, Tuple2<String, OafEntity>>) t -> new Tuple2<>(
|
||||||
|
t.getClass().getName(), t),
|
||||||
|
Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC));
|
||||||
|
|
||||||
|
// pivot on "_1" (classname of the entity)
|
||||||
|
// created columns containing only entities of the same class
|
||||||
|
for (Map.Entry<EntityType, Class> e : ModelSupport.entityTypes.entrySet()) {
|
||||||
|
String entity = e.getKey().name();
|
||||||
|
Class<? extends OafEntity> entityClass = e.getValue();
|
||||||
|
|
||||||
|
groupedEntities = groupedEntities
|
||||||
|
.withColumn(
|
||||||
|
entity,
|
||||||
|
when(col("_1").equalTo(entityClass.getName()), col("_2")));
|
||||||
|
}
|
||||||
|
|
||||||
|
groupedEntities
|
||||||
|
.drop("_1", "_2")
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.text(outputPath);
|
.option("compression", "gzip")
|
||||||
}
|
.save(checkpointPath);
|
||||||
|
|
||||||
public static class GroupingAggregator extends Aggregator<OafEntity, OafEntity, OafEntity> {
|
ForkJoinPool parPool = new ForkJoinPool(ModelSupport.entityTypes.size());
|
||||||
|
|
||||||
@Override
|
ModelSupport.entityTypes
|
||||||
public OafEntity zero() {
|
.entrySet()
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public OafEntity reduce(OafEntity b, OafEntity a) {
|
|
||||||
return mergeAndGet(b, a);
|
|
||||||
}
|
|
||||||
|
|
||||||
private OafEntity mergeAndGet(OafEntity b, OafEntity a) {
|
|
||||||
if (Objects.nonNull(a) && Objects.nonNull(b)) {
|
|
||||||
return OafMapperUtils.mergeEntities(b, a);
|
|
||||||
}
|
|
||||||
return Objects.isNull(a) ? b : a;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public OafEntity merge(OafEntity b, OafEntity a) {
|
|
||||||
return mergeAndGet(b, a);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public OafEntity finish(OafEntity j) {
|
|
||||||
return j;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Encoder<OafEntity> bufferEncoder() {
|
|
||||||
return Encoders.kryo(OafEntity.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Encoder<OafEntity> outputEncoder() {
|
|
||||||
return Encoders.kryo(OafEntity.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static OafEntity parseOaf(String s) {
|
|
||||||
|
|
||||||
DocumentContext dc = JsonPath
|
|
||||||
.parse(s, Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS));
|
|
||||||
final String id = dc.read(ID_JPATH);
|
|
||||||
if (StringUtils.isNotBlank(id)) {
|
|
||||||
|
|
||||||
String prefix = StringUtils.substringBefore(id, "|");
|
|
||||||
switch (prefix) {
|
|
||||||
case "10":
|
|
||||||
return parse(s, Datasource.class);
|
|
||||||
case "20":
|
|
||||||
return parse(s, Organization.class);
|
|
||||||
case "40":
|
|
||||||
return parse(s, Project.class);
|
|
||||||
case "50":
|
|
||||||
String resultType = dc.read("$.resulttype.classid");
|
|
||||||
switch (resultType) {
|
|
||||||
case "publication":
|
|
||||||
return parse(s, Publication.class);
|
|
||||||
case "dataset":
|
|
||||||
return parse(s, eu.dnetlib.dhp.schema.oaf.Dataset.class);
|
|
||||||
case "software":
|
|
||||||
return parse(s, Software.class);
|
|
||||||
case "other":
|
|
||||||
return parse(s, OtherResearchProduct.class);
|
|
||||||
default:
|
|
||||||
throw new IllegalArgumentException(String.format("invalid resultType: '%s'", resultType));
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
throw new IllegalArgumentException(String.format("invalid id prefix: '%s'", prefix));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException(String.format("invalid oaf: '%s'", s));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <T extends OafEntity> OafEntity parse(String s, Class<T> clazz) {
|
|
||||||
try {
|
|
||||||
return OBJECT_MAPPER.readValue(s, clazz);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new IllegalArgumentException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<String> listEntityPaths(String inputPath, JavaSparkContext sc) {
|
|
||||||
return HdfsSupport
|
|
||||||
.listFiles(inputPath, sc.hadoopConfiguration())
|
|
||||||
.stream()
|
.stream()
|
||||||
.filter(f -> !f.toLowerCase().contains("relation"))
|
.map(e -> parPool.submit(() -> {
|
||||||
.collect(Collectors.toList());
|
String entity = e.getKey().name();
|
||||||
}
|
Class<? extends OafEntity> entityClass = e.getValue();
|
||||||
|
|
||||||
|
spark
|
||||||
|
.read()
|
||||||
|
.load(checkpointPath)
|
||||||
|
.select(col(entity).as("value"))
|
||||||
|
.filter("value IS NOT NULL")
|
||||||
|
.as(OAFENTITY_KRYO_ENC)
|
||||||
|
.map((MapFunction<OafEntity, OafEntity>) r -> r, (Encoder<OafEntity>) Encoders.bean(entityClass))
|
||||||
|
.filter(filterInvisible ? "dataInfo.invisible != TRUE" : "TRUE")
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/" + entity);
|
||||||
|
}))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.forEach(t -> {
|
||||||
|
try {
|
||||||
|
t.get();
|
||||||
|
} catch (InterruptedException | ExecutionException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oozie;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||||
|
|
||||||
|
import java.net.URL;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.time.DurationFormatUtils;
|
||||||
|
import org.apache.commons.text.StringSubstitutor;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.common.io.Resources;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
public class RunSQLSparkJob {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(RunSQLSparkJob.class);
|
||||||
|
|
||||||
|
private final ArgumentApplicationParser parser;
|
||||||
|
|
||||||
|
public RunSQLSparkJob(ArgumentApplicationParser parser) {
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
Map<String, String> params = new HashMap<>();
|
||||||
|
for (int i = 0; i < args.length - 1; i++) {
|
||||||
|
if (args[i].startsWith("--")) {
|
||||||
|
params.put(args[i].substring(2), args[++i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* String jsonConfiguration = IOUtils .toString( Objects .requireNonNull( RunSQLSparkJob.class
|
||||||
|
* .getResourceAsStream( "/eu/dnetlib/dhp/oozie/run_sql_parameters.json"))); final ArgumentApplicationParser
|
||||||
|
* parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args);
|
||||||
|
*/
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(params.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
URL url = com.google.common.io.Resources.getResource(params.get("sql"));
|
||||||
|
String raw_sql = Resources.toString(url, StandardCharsets.UTF_8);
|
||||||
|
|
||||||
|
String sql = StringSubstitutor.replace(raw_sql, params);
|
||||||
|
log.info("sql: {}", sql);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.set("hive.metastore.uris", params.get("hiveMetastoreUris"));
|
||||||
|
|
||||||
|
runWithSparkHiveSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
for (String statement : sql.split(";\\s*/\\*\\s*EOS\\s*\\*/\\s*")) {
|
||||||
|
log.info("executing: {}", statement);
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
spark.sql(statement).show();
|
||||||
|
log
|
||||||
|
.info(
|
||||||
|
"executed in {}",
|
||||||
|
DurationFormatUtils.formatDuration(System.currentTimeMillis() - startTime, "HH:mm:ss.S"));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024.
|
||||||
|
* SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.builder.EqualsBuilder;
|
||||||
|
import org.apache.commons.lang3.builder.HashCodeBuilder;
|
||||||
|
|
||||||
|
public class HashableStructuredProperty extends StructuredProperty {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 8371670185221126045L;
|
||||||
|
|
||||||
|
public static HashableStructuredProperty newInstance(String value, Qualifier qualifier, DataInfo dataInfo) {
|
||||||
|
if (value == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final HashableStructuredProperty sp = new HashableStructuredProperty();
|
||||||
|
sp.setValue(value);
|
||||||
|
sp.setQualifier(qualifier);
|
||||||
|
sp.setDataInfo(dataInfo);
|
||||||
|
return sp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HashableStructuredProperty newInstance(StructuredProperty sp) {
|
||||||
|
HashableStructuredProperty hsp = new HashableStructuredProperty();
|
||||||
|
hsp.setQualifier(sp.getQualifier());
|
||||||
|
hsp.setValue(sp.getValue());
|
||||||
|
hsp.setQualifier(sp.getQualifier());
|
||||||
|
return hsp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static StructuredProperty toStructuredProperty(HashableStructuredProperty hsp) {
|
||||||
|
StructuredProperty sp = new StructuredProperty();
|
||||||
|
sp.setQualifier(hsp.getQualifier());
|
||||||
|
sp.setValue(hsp.getValue());
|
||||||
|
sp.setQualifier(hsp.getQualifier());
|
||||||
|
return sp;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return new HashCodeBuilder(11, 91)
|
||||||
|
.append(getQualifier().getClassid())
|
||||||
|
.append(getQualifier().getSchemeid())
|
||||||
|
.append(getValue())
|
||||||
|
.hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (obj == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (obj == this) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (obj.getClass() != getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
final HashableStructuredProperty rhs = (HashableStructuredProperty) obj;
|
||||||
|
return new EqualsBuilder()
|
||||||
|
.append(getQualifier().getClassid(), rhs.getQualifier().getClassid())
|
||||||
|
.append(getQualifier().getSchemeid(), rhs.getQualifier().getSchemeid())
|
||||||
|
.append(getValue(), rhs.getValue())
|
||||||
|
.isEquals();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,46 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
public class CleaningFunctions {
|
||||||
|
|
||||||
|
public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
|
||||||
|
public static final String DOI_PREFIX = "10.";
|
||||||
|
|
||||||
|
public static final Set<String> PID_BLACKLIST = new HashSet<>();
|
||||||
|
|
||||||
|
static {
|
||||||
|
PID_BLACKLIST.add("none");
|
||||||
|
PID_BLACKLIST.add("na");
|
||||||
|
}
|
||||||
|
|
||||||
|
public CleaningFunctions() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility method that filter PID values on a per-type basis.
|
||||||
|
* @param s the PID whose value will be checked.
|
||||||
|
* @return false if the pid matches the filter criteria, true otherwise.
|
||||||
|
*/
|
||||||
|
public static boolean pidFilter(StructuredProperty s) {
|
||||||
|
final String pidValue = s.getValue();
|
||||||
|
if (Objects.isNull(s.getQualifier()) ||
|
||||||
|
StringUtils.isBlank(pidValue) ||
|
||||||
|
StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return !PidBlacklistProvider.getBlacklist(s.getQualifier().getClassid()).contains(pidValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,14 +1,30 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
public class DoiCleaningRule {
|
public class DoiCleaningRule {
|
||||||
|
|
||||||
public static String clean(final String doi) {
|
public static String clean(final String doi) {
|
||||||
return doi
|
if (doi == null)
|
||||||
.toLowerCase()
|
return null;
|
||||||
.replaceAll("\\s", "")
|
final String replaced = doi
|
||||||
|
.replaceAll("\\n|\\r|\\t|\\s", "")
|
||||||
.replaceAll("^doi:", "")
|
.replaceAll("^doi:", "")
|
||||||
|
.toLowerCase()
|
||||||
.replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX);
|
.replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX);
|
||||||
|
if (StringUtils.isEmpty(replaced))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
if (!replaced.contains("10."))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
final String ret = replaced.substring(replaced.indexOf("10."));
|
||||||
|
|
||||||
|
if (!ret.startsWith(CleaningFunctions.DOI_PREFIX))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,12 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.OPENAIRE_META_RESOURCE_TYPE;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
|
||||||
|
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
import java.time.LocalDate;
|
import java.time.LocalDate;
|
||||||
import java.time.ZoneId;
|
import java.time.ZoneId;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
|
@ -19,6 +23,7 @@ import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
@ -26,6 +31,10 @@ import me.xuender.unidecode.Unidecode;
|
||||||
|
|
||||||
public class GraphCleaningFunctions extends CleaningFunctions {
|
public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
|
|
||||||
|
public static final String DNET_PUBLISHERS = "dnet:publishers";
|
||||||
|
|
||||||
|
public static final String DNET_LICENSES = "dnet:licenses";
|
||||||
|
|
||||||
public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
|
public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
|
||||||
public static final int ORCID_LEN = 19;
|
public static final int ORCID_LEN = 19;
|
||||||
public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
|
public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
|
||||||
|
@ -37,6 +46,69 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
|
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
|
||||||
private static final String NAME_CLEANING_REGEX = "[\\r\\n\\t\\s]+";
|
private static final String NAME_CLEANING_REGEX = "[\\r\\n\\t\\s]+";
|
||||||
|
|
||||||
|
private static final Set<String> INVALID_AUTHOR_NAMES = new HashSet<>();
|
||||||
|
|
||||||
|
private static final Set<String> INVALID_URLS = new HashSet<>();
|
||||||
|
|
||||||
|
private static final Set<String> INVALID_URL_HOSTS = new HashSet<>();
|
||||||
|
|
||||||
|
private static final HashSet<String> PEER_REVIEWED_TYPES = new HashSet<>();
|
||||||
|
|
||||||
|
static {
|
||||||
|
PEER_REVIEWED_TYPES.add("Article");
|
||||||
|
PEER_REVIEWED_TYPES.add("Part of book or chapter of book");
|
||||||
|
PEER_REVIEWED_TYPES.add("Book");
|
||||||
|
PEER_REVIEWED_TYPES.add("Doctoral thesis");
|
||||||
|
PEER_REVIEWED_TYPES.add("Master thesis");
|
||||||
|
PEER_REVIEWED_TYPES.add("Data Paper");
|
||||||
|
PEER_REVIEWED_TYPES.add("Thesis");
|
||||||
|
PEER_REVIEWED_TYPES.add("Bachelor thesis");
|
||||||
|
PEER_REVIEWED_TYPES.add("Conference object");
|
||||||
|
|
||||||
|
INVALID_AUTHOR_NAMES.add("(:null)");
|
||||||
|
INVALID_AUTHOR_NAMES.add("(:unap)");
|
||||||
|
INVALID_AUTHOR_NAMES.add("(:tba)");
|
||||||
|
INVALID_AUTHOR_NAMES.add("(:unas)");
|
||||||
|
INVALID_AUTHOR_NAMES.add("(:unav)");
|
||||||
|
INVALID_AUTHOR_NAMES.add("(:unkn)");
|
||||||
|
INVALID_AUTHOR_NAMES.add("(:unkn) unknown");
|
||||||
|
INVALID_AUTHOR_NAMES.add(":none");
|
||||||
|
INVALID_AUTHOR_NAMES.add(":null");
|
||||||
|
INVALID_AUTHOR_NAMES.add(":unas");
|
||||||
|
INVALID_AUTHOR_NAMES.add(":unav");
|
||||||
|
INVALID_AUTHOR_NAMES.add(":unkn");
|
||||||
|
INVALID_AUTHOR_NAMES.add("[autor desconocido]");
|
||||||
|
INVALID_AUTHOR_NAMES.add("[s. n.]");
|
||||||
|
INVALID_AUTHOR_NAMES.add("[s.n]");
|
||||||
|
INVALID_AUTHOR_NAMES.add("[unknown]");
|
||||||
|
INVALID_AUTHOR_NAMES.add("anonymous");
|
||||||
|
INVALID_AUTHOR_NAMES.add("n.n.");
|
||||||
|
INVALID_AUTHOR_NAMES.add("nn");
|
||||||
|
INVALID_AUTHOR_NAMES.add("no name supplied");
|
||||||
|
INVALID_AUTHOR_NAMES.add("none");
|
||||||
|
INVALID_AUTHOR_NAMES.add("none available");
|
||||||
|
INVALID_AUTHOR_NAMES.add("not available not available");
|
||||||
|
INVALID_AUTHOR_NAMES.add("null &na;");
|
||||||
|
INVALID_AUTHOR_NAMES.add("null anonymous");
|
||||||
|
INVALID_AUTHOR_NAMES.add("unbekannt");
|
||||||
|
INVALID_AUTHOR_NAMES.add("unknown");
|
||||||
|
INVALID_AUTHOR_NAMES.add("autor, Sin");
|
||||||
|
INVALID_AUTHOR_NAMES.add("Desconocido / Inconnu,");
|
||||||
|
|
||||||
|
INVALID_URL_HOSTS.add("creativecommons.org");
|
||||||
|
INVALID_URL_HOSTS.add("www.academia.edu");
|
||||||
|
INVALID_URL_HOSTS.add("academia.edu");
|
||||||
|
INVALID_URL_HOSTS.add("researchgate.net");
|
||||||
|
INVALID_URL_HOSTS.add("www.researchgate.net");
|
||||||
|
|
||||||
|
INVALID_URLS.add("http://repo.scoap3.org/api");
|
||||||
|
INVALID_URLS.add("http://ora.ox.ac.uk/objects/uuid:");
|
||||||
|
INVALID_URLS.add("http://ntur.lib.ntu.edu.tw/news/agent_contract.pdf");
|
||||||
|
INVALID_URLS.add("https://media.springer.com/full/springer-instructions-for-authors-assets/pdf/SN_BPF_EN.pdf");
|
||||||
|
INVALID_URLS.add("http://www.tobaccoinduceddiseases.org/dl/61aad426c96519bea4040a374c6a6110/");
|
||||||
|
INVALID_URLS.add("https://www.bilboard.nl/verenigingsbladen/bestuurskundige-berichten");
|
||||||
|
}
|
||||||
|
|
||||||
public static <T extends Oaf> T cleanContext(T value, String contextId, String verifyParam) {
|
public static <T extends Oaf> T cleanContext(T value, String contextId, String verifyParam) {
|
||||||
if (ModelSupport.isSubClass(value, Result.class)) {
|
if (ModelSupport.isSubClass(value, Result.class)) {
|
||||||
final Result res = (Result) value;
|
final Result res = (Result) value;
|
||||||
|
@ -47,7 +119,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.getContext()
|
.getContext()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
|
.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toCollection(ArrayList::new)));
|
||||||
}
|
}
|
||||||
return (T) res;
|
return (T) res;
|
||||||
} else {
|
} else {
|
||||||
|
@ -242,7 +314,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
// nothing to evaluate here
|
final Datasource d = (Datasource) value;
|
||||||
|
return Objects.nonNull(d.getOfficialname()) && StringUtils.isNotBlank(d.getOfficialname().getValue());
|
||||||
} else if (value instanceof Project) {
|
} else if (value instanceof Project) {
|
||||||
final Project p = (Project) value;
|
final Project p = (Project) value;
|
||||||
return Objects.nonNull(p.getCode()) && StringUtils.isNotBlank(p.getCode().getValue());
|
return Objects.nonNull(p.getCode()) && StringUtils.isNotBlank(p.getCode().getValue());
|
||||||
|
@ -273,6 +346,12 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
|
|
||||||
public static <T extends Oaf> T cleanup(T value, VocabularyGroup vocs) {
|
public static <T extends Oaf> T cleanup(T value, VocabularyGroup vocs) {
|
||||||
|
|
||||||
|
if (Objects.isNull(value.getDataInfo())) {
|
||||||
|
final DataInfo d = new DataInfo();
|
||||||
|
d.setDeletedbyinference(false);
|
||||||
|
value.setDataInfo(d);
|
||||||
|
}
|
||||||
|
|
||||||
if (value instanceof OafEntity) {
|
if (value instanceof OafEntity) {
|
||||||
|
|
||||||
OafEntity e = (OafEntity) value;
|
OafEntity e = (OafEntity) value;
|
||||||
|
@ -292,6 +371,10 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
} else if (value instanceof Result) {
|
} else if (value instanceof Result) {
|
||||||
Result r = (Result) value;
|
Result r = (Result) value;
|
||||||
|
|
||||||
|
if (Objects.isNull(r.getContext())) {
|
||||||
|
r.setContext(new ArrayList<>());
|
||||||
|
}
|
||||||
|
|
||||||
if (Objects.nonNull(r.getFulltext())
|
if (Objects.nonNull(r.getFulltext())
|
||||||
&& (ModelConstants.SOFTWARE_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()) ||
|
&& (ModelConstants.SOFTWARE_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()) ||
|
||||||
ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) {
|
ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) {
|
||||||
|
@ -334,6 +417,14 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.getPublisher()
|
.getPublisher()
|
||||||
.getValue()
|
.getValue()
|
||||||
.replaceAll(NAME_CLEANING_REGEX, " "));
|
.replaceAll(NAME_CLEANING_REGEX, " "));
|
||||||
|
|
||||||
|
if (vocs.vocabularyExists(DNET_PUBLISHERS)) {
|
||||||
|
vocs
|
||||||
|
.find(DNET_PUBLISHERS)
|
||||||
|
.map(voc -> voc.getTermBySynonym(r.getPublisher().getValue()))
|
||||||
|
.map(VocabularyTerm::getName)
|
||||||
|
.ifPresent(publisher -> r.getPublisher().setValue(publisher));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
|
if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
|
||||||
|
@ -417,6 +508,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
||||||
.map(GraphCleaningFunctions::cleanValue)
|
.map(GraphCleaningFunctions::cleanValue)
|
||||||
|
.sorted((s1, s2) -> s2.getValue().length() - s1.getValue().length())
|
||||||
|
.limit(ModelHardLimits.MAX_ABSTRACTS)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
|
if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
|
||||||
|
@ -470,12 +563,24 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(i.getPid())
|
.ofNullable(i.getPid())
|
||||||
.ifPresent(pid -> {
|
.ifPresent(pid -> {
|
||||||
final Set<StructuredProperty> pids = Sets.newHashSet(pid);
|
final Set<HashableStructuredProperty> pids = pid
|
||||||
|
.stream()
|
||||||
|
.map(HashableStructuredProperty::newInstance)
|
||||||
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(i.getAlternateIdentifier())
|
.ofNullable(i.getAlternateIdentifier())
|
||||||
.ifPresent(altId -> {
|
.ifPresent(altId -> {
|
||||||
final Set<StructuredProperty> altIds = Sets.newHashSet(altId);
|
final Set<HashableStructuredProperty> altIds = altId
|
||||||
i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids)));
|
.stream()
|
||||||
|
.map(HashableStructuredProperty::newInstance)
|
||||||
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
|
i
|
||||||
|
.setAlternateIdentifier(
|
||||||
|
Sets
|
||||||
|
.difference(altIds, pids)
|
||||||
|
.stream()
|
||||||
|
.map(HashableStructuredProperty::toStructuredProperty)
|
||||||
|
.collect(Collectors.toList()));
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -493,6 +598,43 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
if (Objects.isNull(i.getRefereed()) || StringUtils.isBlank(i.getRefereed().getClassid())) {
|
if (Objects.isNull(i.getRefereed()) || StringUtils.isBlank(i.getRefereed().getClassid())) {
|
||||||
i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS));
|
i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Objects.nonNull(i.getLicense()) && Objects.nonNull(i.getLicense().getValue())) {
|
||||||
|
vocs
|
||||||
|
.find(DNET_LICENSES)
|
||||||
|
.map(voc -> voc.getTermBySynonym(i.getLicense().getValue()))
|
||||||
|
.map(VocabularyTerm::getId)
|
||||||
|
.ifPresent(license -> i.getLicense().setValue(license));
|
||||||
|
}
|
||||||
|
|
||||||
|
// from the script from Dimitris
|
||||||
|
if ("0000".equals(i.getRefereed().getClassid())) {
|
||||||
|
final boolean isFromCrossref = Optional
|
||||||
|
.ofNullable(i.getCollectedfrom())
|
||||||
|
.map(KeyValue::getKey)
|
||||||
|
.map(id -> id.equals(ModelConstants.CROSSREF_ID))
|
||||||
|
.orElse(false);
|
||||||
|
final boolean hasDoi = Optional
|
||||||
|
.ofNullable(i.getPid())
|
||||||
|
.map(
|
||||||
|
pid -> pid
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
p -> PidType.doi.toString().equals(p.getQualifier().getClassid())))
|
||||||
|
.orElse(false);
|
||||||
|
final boolean isPeerReviewedType = PEER_REVIEWED_TYPES
|
||||||
|
.contains(i.getInstancetype().getClassname());
|
||||||
|
final boolean noOtherLitType = r
|
||||||
|
.getInstance()
|
||||||
|
.stream()
|
||||||
|
.noneMatch(ii -> "Other literature type".equals(ii.getInstancetype().getClassname()));
|
||||||
|
if (isFromCrossref && hasDoi && isPeerReviewedType && noOtherLitType) {
|
||||||
|
i.setRefereed(qualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS));
|
||||||
|
} else {
|
||||||
|
i.setRefereed(qualifier("0002", "nonPeerReviewed", ModelConstants.DNET_REVIEW_LEVELS));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (Objects.nonNull(i.getDateofacceptance())) {
|
if (Objects.nonNull(i.getDateofacceptance())) {
|
||||||
Optional<String> date = cleanDateField(i.getDateofacceptance());
|
Optional<String> date = cleanDateField(i.getDateofacceptance());
|
||||||
if (date.isPresent()) {
|
if (date.isPresent()) {
|
||||||
|
@ -506,6 +648,15 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) {
|
ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) {
|
||||||
i.setFulltext(null);
|
i.setFulltext(null);
|
||||||
}
|
}
|
||||||
|
if (Objects.nonNull(i.getUrl())) {
|
||||||
|
i
|
||||||
|
.setUrl(
|
||||||
|
i
|
||||||
|
.getUrl()
|
||||||
|
.stream()
|
||||||
|
.filter(GraphCleaningFunctions::urlFilter)
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (Objects.isNull(r.getBestaccessright())
|
if (Objects.isNull(r.getBestaccessright())
|
||||||
|
@ -528,8 +679,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.getAuthor()
|
.getAuthor()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(a -> StringUtils.isNotBlank(a.getFullname()))
|
.filter(GraphCleaningFunctions::isValidAuthorName)
|
||||||
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
|
|
||||||
.map(GraphCleaningFunctions::cleanupAuthor)
|
.map(GraphCleaningFunctions::cleanupAuthor)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
|
@ -556,6 +706,9 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(p -> Objects.nonNull(p.getQualifier()))
|
.filter(p -> Objects.nonNull(p.getQualifier()))
|
||||||
.filter(p -> StringUtils.isNotBlank(p.getValue()))
|
.filter(p -> StringUtils.isNotBlank(p.getValue()))
|
||||||
|
.filter(
|
||||||
|
p -> StringUtils
|
||||||
|
.contains(StringUtils.lowerCase(p.getQualifier().getClassid()), ORCID))
|
||||||
.map(p -> {
|
.map(p -> {
|
||||||
// hack to distinguish orcid from orcid_pending
|
// hack to distinguish orcid from orcid_pending
|
||||||
String pidProvenance = getProvenance(p.getDataInfo());
|
String pidProvenance = getProvenance(p.getDataInfo());
|
||||||
|
@ -565,7 +718,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
.contains(ModelConstants.ORCID)) {
|
.contains(ModelConstants.ORCID)) {
|
||||||
if (pidProvenance
|
if (pidProvenance
|
||||||
.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) {
|
.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY) ||
|
||||||
|
pidProvenance.equals("ORCID_ENRICHMENT")) {
|
||||||
p.getQualifier().setClassid(ModelConstants.ORCID);
|
p.getQualifier().setClassid(ModelConstants.ORCID);
|
||||||
} else {
|
} else {
|
||||||
p.getQualifier().setClassid(ModelConstants.ORCID_PENDING);
|
p.getQualifier().setClassid(ModelConstants.ORCID_PENDING);
|
||||||
|
@ -687,12 +841,30 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
// HELPERS
|
// HELPERS
|
||||||
|
|
||||||
private static boolean isValidAuthorName(Author a) {
|
private static boolean isValidAuthorName(Author a) {
|
||||||
return !Stream
|
return StringUtils.isNotBlank(a.getFullname()) &&
|
||||||
.of(a.getFullname(), a.getName(), a.getSurname())
|
StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")) &&
|
||||||
.filter(s -> s != null && !s.isEmpty())
|
!INVALID_AUTHOR_NAMES.contains(StringUtils.lowerCase(a.getFullname()).trim()) &&
|
||||||
.collect(Collectors.joining(""))
|
!Stream
|
||||||
.toLowerCase()
|
.of(a.getFullname(), a.getName(), a.getSurname())
|
||||||
.matches(INVALID_AUTHOR_REGEX);
|
.filter(StringUtils::isNotBlank)
|
||||||
|
.collect(Collectors.joining(""))
|
||||||
|
.toLowerCase()
|
||||||
|
.matches(INVALID_AUTHOR_REGEX);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean urlFilter(String u) {
|
||||||
|
try {
|
||||||
|
final URL url = new URL(u);
|
||||||
|
if (StringUtils.isBlank(url.getPath()) || "/".equals(url.getPath())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (INVALID_URL_HOSTS.contains(url.getHost())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return !INVALID_URLS.contains(url.toString());
|
||||||
|
} catch (MalformedURLException ex) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) {
|
private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) {
|
||||||
|
@ -742,4 +914,142 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static OafEntity applyCoarVocabularies(OafEntity entity, VocabularyGroup vocs) {
|
||||||
|
|
||||||
|
if (entity instanceof Result) {
|
||||||
|
final Result result = (Result) entity;
|
||||||
|
|
||||||
|
Optional
|
||||||
|
.ofNullable(result.getInstance())
|
||||||
|
.ifPresent(
|
||||||
|
instances -> instances
|
||||||
|
.forEach(
|
||||||
|
instance -> {
|
||||||
|
if (Objects.isNull(instance.getInstanceTypeMapping())) {
|
||||||
|
List<InstanceTypeMapping> mapping = Lists.newArrayList();
|
||||||
|
mapping
|
||||||
|
.add(
|
||||||
|
OafMapperUtils
|
||||||
|
.instanceTypeMapping(
|
||||||
|
instance.getInstancetype().getClassname(),
|
||||||
|
OPENAIRE_COAR_RESOURCE_TYPES_3_1));
|
||||||
|
instance.setInstanceTypeMapping(mapping);
|
||||||
|
}
|
||||||
|
Optional<InstanceTypeMapping> optionalItm = instance
|
||||||
|
.getInstanceTypeMapping()
|
||||||
|
.stream()
|
||||||
|
.filter(GraphCleaningFunctions::originalResourceType)
|
||||||
|
.findFirst();
|
||||||
|
if (optionalItm.isPresent()) {
|
||||||
|
InstanceTypeMapping coarItm = optionalItm.get();
|
||||||
|
Optional
|
||||||
|
.ofNullable(
|
||||||
|
vocs
|
||||||
|
.lookupTermBySynonym(
|
||||||
|
OPENAIRE_COAR_RESOURCE_TYPES_3_1, coarItm.getOriginalType()))
|
||||||
|
.ifPresent(type -> {
|
||||||
|
coarItm.setTypeCode(type.getClassid());
|
||||||
|
coarItm.setTypeLabel(type.getClassname());
|
||||||
|
});
|
||||||
|
final List<InstanceTypeMapping> mappings = Lists.newArrayList();
|
||||||
|
if (vocs.vocabularyExists(OPENAIRE_USER_RESOURCE_TYPES)) {
|
||||||
|
Optional
|
||||||
|
.ofNullable(
|
||||||
|
vocs
|
||||||
|
.lookupTermBySynonym(
|
||||||
|
OPENAIRE_USER_RESOURCE_TYPES, coarItm.getTypeCode()))
|
||||||
|
.ifPresent(
|
||||||
|
type -> mappings
|
||||||
|
.add(
|
||||||
|
OafMapperUtils
|
||||||
|
.instanceTypeMapping(coarItm.getTypeCode(), type)));
|
||||||
|
}
|
||||||
|
if (!mappings.isEmpty()) {
|
||||||
|
instance.getInstanceTypeMapping().addAll(mappings);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
result.setMetaResourceType(getMetaResourceType(result.getInstance(), vocs));
|
||||||
|
}
|
||||||
|
|
||||||
|
return entity;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean originalResourceType(InstanceTypeMapping itm) {
|
||||||
|
return StringUtils.isNotBlank(itm.getOriginalType()) &&
|
||||||
|
OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()) &&
|
||||||
|
StringUtils.isBlank(itm.getTypeCode()) &&
|
||||||
|
StringUtils.isBlank(itm.getTypeLabel());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Qualifier getMetaResourceType(final List<Instance> instances, final VocabularyGroup vocs) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(instances)
|
||||||
|
.map(ii -> {
|
||||||
|
if (vocs.vocabularyExists(OPENAIRE_META_RESOURCE_TYPE)) {
|
||||||
|
Optional<InstanceTypeMapping> itm = ii
|
||||||
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.flatMap(
|
||||||
|
i -> Optional
|
||||||
|
.ofNullable(i.getInstanceTypeMapping())
|
||||||
|
.map(Collection::stream)
|
||||||
|
.orElse(Stream.empty()))
|
||||||
|
.filter(t -> OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(t.getVocabularyName()))
|
||||||
|
.findFirst();
|
||||||
|
|
||||||
|
if (!itm.isPresent() || Objects.isNull(itm.get().getTypeCode())) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
final String typeCode = itm.get().getTypeCode();
|
||||||
|
return Optional
|
||||||
|
.ofNullable(vocs.lookupTermBySynonym(OPENAIRE_META_RESOURCE_TYPE, typeCode))
|
||||||
|
.orElseThrow(
|
||||||
|
() -> new IllegalStateException("unable to find a synonym for '" + typeCode + "' in " +
|
||||||
|
OPENAIRE_META_RESOURCE_TYPE));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException("vocabulary '" + OPENAIRE_META_RESOURCE_TYPE + "' not available");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements bad and ugly things that we should get rid of ASAP.
|
||||||
|
*
|
||||||
|
* @param value
|
||||||
|
* @return
|
||||||
|
* @param <T>
|
||||||
|
*/
|
||||||
|
public static <T extends Oaf> T dedicatedUglyHacks(T value) {
|
||||||
|
if (value instanceof OafEntity) {
|
||||||
|
if (value instanceof Result) {
|
||||||
|
final Result r = (Result) value;
|
||||||
|
|
||||||
|
// Fix for AMS Acta
|
||||||
|
Optional
|
||||||
|
.ofNullable(r.getInstance())
|
||||||
|
.map(
|
||||||
|
instance -> instance
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
i -> Optional
|
||||||
|
.ofNullable(i.getHostedby())
|
||||||
|
.map(KeyValue::getKey)
|
||||||
|
.map(dsId -> dsId.equals("10|re3data_____::4cc76bed7ce2fb95fd8e7a2dfde16016"))
|
||||||
|
.orElse(false)))
|
||||||
|
.ifPresent(instance -> instance.forEach(i -> {
|
||||||
|
if (Optional
|
||||||
|
.ofNullable(i.getPid())
|
||||||
|
.map(pid -> pid.stream().noneMatch(p -> p.getValue().startsWith("10.6092/unibo/amsacta")))
|
||||||
|
.orElse(false)) {
|
||||||
|
i.setHostedby(UNKNOWN_REPOSITORY);
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,294 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static com.google.common.base.Preconditions.checkArgument;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.security.MessageDigest;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import org.apache.commons.codec.binary.Hex;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.google.common.collect.HashBiMap;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory class for OpenAIRE identifiers in the Graph
|
||||||
|
*/
|
||||||
|
public class IdentifierFactory implements Serializable {
|
||||||
|
|
||||||
|
public static final String ID_SEPARATOR = "::";
|
||||||
|
public static final String ID_PREFIX_SEPARATOR = "|";
|
||||||
|
|
||||||
|
public static final int ID_PREFIX_LEN = 12;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] considered authoritative for that PID_TYPE.
|
||||||
|
* The id of the record (source_::id) will be rewritten as pidType_::id)
|
||||||
|
*/
|
||||||
|
public static final Map<PidType, HashBiMap<String, String>> PID_AUTHORITY = Maps.newHashMap();
|
||||||
|
|
||||||
|
static {
|
||||||
|
PID_AUTHORITY.put(PidType.doi, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref");
|
||||||
|
PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite");
|
||||||
|
PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "ZENODO");
|
||||||
|
PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "Zenodo");
|
||||||
|
|
||||||
|
PID_AUTHORITY.put(PidType.pmc, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
|
||||||
|
PID_AUTHORITY.get(PidType.pmc).put(PUBMED_CENTRAL_ID, "PubMed Central");
|
||||||
|
|
||||||
|
PID_AUTHORITY.put(PidType.pmid, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.pmid).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
|
||||||
|
PID_AUTHORITY.get(PidType.pmid).put(PUBMED_CENTRAL_ID, "PubMed Central");
|
||||||
|
|
||||||
|
PID_AUTHORITY.put(PidType.arXiv, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive");
|
||||||
|
|
||||||
|
PID_AUTHORITY.put(PidType.w3id, HashBiMap.create());
|
||||||
|
PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ROHub");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Declares the associations PID_TYPE -> [DATASOURCE ID, PID SUBSTRING] considered as delegated authority for that
|
||||||
|
* PID_TYPE. Example, Zenodo is delegated to forge DOIs that contain the 'zenodo' word.
|
||||||
|
*
|
||||||
|
* If a record with the same id (same pid) comes from 2 data sources, the one coming from a delegated source wins. E.g. Zenodo records win over those from Datacite.
|
||||||
|
* See also https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/187 and the class dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
|
||||||
|
*/
|
||||||
|
public static final Map<PidType, Map<String, String>> DELEGATED_PID_AUTHORITY = Maps.newHashMap();
|
||||||
|
|
||||||
|
static {
|
||||||
|
DELEGATED_PID_AUTHORITY.put(PidType.doi, new HashMap<>());
|
||||||
|
DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "zenodo");
|
||||||
|
DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "zenodo");
|
||||||
|
DELEGATED_PID_AUTHORITY.put(PidType.w3id, new HashMap<>());
|
||||||
|
DELEGATED_PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ro-id");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] whose records are considered enrichment for the graph.
|
||||||
|
* Their OpenAIRE ID is built from the declared PID type. Are merged with their corresponding record, identified by
|
||||||
|
* the same OpenAIRE id.
|
||||||
|
*/
|
||||||
|
public static final Map<PidType, HashBiMap<String, String>> ENRICHMENT_PROVIDER = Maps.newHashMap();
|
||||||
|
|
||||||
|
static {
|
||||||
|
ENRICHMENT_PROVIDER.put(PidType.doi, HashBiMap.create());
|
||||||
|
ENRICHMENT_PROVIDER.get(PidType.doi).put(OPEN_APC_ID, OPEN_APC_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Set<String> delegatedAuthorityDatasourceIds() {
|
||||||
|
return DELEGATED_PID_AUTHORITY
|
||||||
|
.values()
|
||||||
|
.stream()
|
||||||
|
.flatMap(m -> m.keySet().stream())
|
||||||
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
|
||||||
|
return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T extends Result> String createDOIBoostIdentifier(T entity) {
|
||||||
|
if (entity == null)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
StructuredProperty pid = null;
|
||||||
|
if (entity.getPid() != null) {
|
||||||
|
pid = entity
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid()))
|
||||||
|
.filter(CleaningFunctions::pidFilter)
|
||||||
|
.findAny()
|
||||||
|
.orElse(null);
|
||||||
|
} else {
|
||||||
|
if (entity.getInstance() != null) {
|
||||||
|
pid = entity
|
||||||
|
.getInstance()
|
||||||
|
.stream()
|
||||||
|
.filter(i -> i.getPid() != null)
|
||||||
|
.flatMap(i -> i.getPid().stream())
|
||||||
|
.filter(CleaningFunctions::pidFilter)
|
||||||
|
.findAny()
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (pid != null)
|
||||||
|
return idFromPid(entity, pid, true);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an identifier from the most relevant PID (if available) provided by a known PID authority in the given
|
||||||
|
* entity T. Returns entity.id when none of the PIDs meet the selection criteria is available.
|
||||||
|
*
|
||||||
|
* @param entity the entity providing PIDs and a default ID.
|
||||||
|
* @param <T> the specific entity type. Currently Organization and Result subclasses are supported.
|
||||||
|
* @param md5 indicates whether should hash the PID value or not.
|
||||||
|
* @return an identifier from the most relevant PID, entity.id otherwise
|
||||||
|
*/
|
||||||
|
public static <T extends OafEntity> String createIdentifier(T entity, boolean md5) {
|
||||||
|
|
||||||
|
checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier");
|
||||||
|
|
||||||
|
final Map<String, Set<StructuredProperty>> pids = extractPids(entity);
|
||||||
|
|
||||||
|
return pids
|
||||||
|
.values()
|
||||||
|
.stream()
|
||||||
|
.flatMap(Set::stream)
|
||||||
|
.min(new PidComparator<>(entity))
|
||||||
|
.map(
|
||||||
|
min -> Optional
|
||||||
|
.ofNullable(pids.get(min.getQualifier().getClassid()))
|
||||||
|
.map(
|
||||||
|
p -> p
|
||||||
|
.stream()
|
||||||
|
.sorted(new PidValueComparator())
|
||||||
|
.findFirst()
|
||||||
|
.map(s -> idFromPid(entity, s, md5))
|
||||||
|
.orElseGet(entity::getId))
|
||||||
|
.orElseGet(entity::getId))
|
||||||
|
.orElseGet(entity::getId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends OafEntity> Map<String, Set<StructuredProperty>> extractPids(T entity) {
|
||||||
|
if (entity instanceof Result) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(((Result) entity).getInstance())
|
||||||
|
.map(IdentifierFactory::mapPids)
|
||||||
|
.orElse(new HashMap<>());
|
||||||
|
} else {
|
||||||
|
return entity
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.map(PidCleaner::normalizePidValue)
|
||||||
|
.filter(CleaningFunctions::pidFilter)
|
||||||
|
.collect(
|
||||||
|
Collectors
|
||||||
|
.groupingBy(
|
||||||
|
p -> p.getQualifier().getClassid(),
|
||||||
|
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Map<String, Set<StructuredProperty>> mapPids(List<Instance> instance) {
|
||||||
|
return instance
|
||||||
|
.stream()
|
||||||
|
.map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false))
|
||||||
|
.flatMap(Function.identity())
|
||||||
|
.collect(
|
||||||
|
Collectors
|
||||||
|
.groupingBy(
|
||||||
|
p -> p.getQualifier().getClassid(),
|
||||||
|
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Stream<StructuredProperty> pidFromInstance(List<StructuredProperty> pid, KeyValue collectedFrom,
|
||||||
|
boolean mapHandles) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(pid)
|
||||||
|
.map(
|
||||||
|
pp -> pp
|
||||||
|
.stream()
|
||||||
|
// filter away PIDs provided by a DS that is not considered an authority for the
|
||||||
|
// given PID Type
|
||||||
|
.filter(p -> shouldFilterPidByCriteria(collectedFrom, p, mapHandles))
|
||||||
|
.map(PidCleaner::normalizePidValue)
|
||||||
|
.filter(p -> isNotFromDelegatedAuthority(collectedFrom, p))
|
||||||
|
.filter(CleaningFunctions::pidFilter))
|
||||||
|
.orElse(Stream.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean shouldFilterPidByCriteria(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) {
|
||||||
|
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
|
||||||
|
|
||||||
|
if (Objects.isNull(collectedFrom)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean isEnrich = Optional
|
||||||
|
.ofNullable(ENRICHMENT_PROVIDER.get(pType))
|
||||||
|
.map(
|
||||||
|
enrich -> enrich.containsKey(collectedFrom.getKey())
|
||||||
|
|| enrich.containsValue(collectedFrom.getValue()))
|
||||||
|
.orElse(false);
|
||||||
|
|
||||||
|
boolean isAuthority = Optional
|
||||||
|
.ofNullable(PID_AUTHORITY.get(pType))
|
||||||
|
.map(
|
||||||
|
authorities -> authorities.containsKey(collectedFrom.getKey())
|
||||||
|
|| authorities.containsValue(collectedFrom.getValue()))
|
||||||
|
.orElse(false);
|
||||||
|
|
||||||
|
return (mapHandles && pType.equals(PidType.handle)) || isEnrich || isAuthority;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isNotFromDelegatedAuthority(KeyValue collectedFrom, StructuredProperty p) {
|
||||||
|
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
|
||||||
|
|
||||||
|
final Map<String, String> da = DELEGATED_PID_AUTHORITY.get(pType);
|
||||||
|
if (Objects.isNull(da)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!da.containsKey(collectedFrom.getKey())) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return StringUtils.contains(p.getValue(), da.get(collectedFrom.getKey()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see {@link IdentifierFactory#createIdentifier(OafEntity, boolean)}
|
||||||
|
*/
|
||||||
|
public static <T extends OafEntity> String createIdentifier(T entity) {
|
||||||
|
|
||||||
|
return createIdentifier(entity, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends OafEntity> String idFromPid(T entity, StructuredProperty s, boolean md5) {
|
||||||
|
return idFromPid(ModelSupport.getIdPrefix(entity.getClass()), s.getQualifier().getClassid(), s.getValue(), md5);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) {
|
||||||
|
return new StringBuilder()
|
||||||
|
.append(numericPrefix)
|
||||||
|
.append(ID_PREFIX_SEPARATOR)
|
||||||
|
.append(createPrefix(pidType))
|
||||||
|
.append(ID_SEPARATOR)
|
||||||
|
.append(md5 ? md5(pidValue) : pidValue)
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
// create the prefix (length = 12)
|
||||||
|
private static String createPrefix(String pidType) {
|
||||||
|
StringBuilder prefix = new StringBuilder(StringUtils.left(pidType, ID_PREFIX_LEN));
|
||||||
|
while (prefix.length() < ID_PREFIX_LEN) {
|
||||||
|
prefix.append("_");
|
||||||
|
}
|
||||||
|
return prefix.substring(0, ID_PREFIX_LEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String md5(final String s) {
|
||||||
|
try {
|
||||||
|
final MessageDigest md = MessageDigest.getInstance("MD5");
|
||||||
|
md.update(s.getBytes(StandardCharsets.UTF_8));
|
||||||
|
return new String(Hex.encodeHex(md.digest()));
|
||||||
|
} catch (final Exception e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,78 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Source code recreated from a .class file by IntelliJ IDEA
|
||||||
|
// (powered by FernFlower decompiler)
|
||||||
|
//
|
||||||
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
public class MergeComparator implements Comparator<Oaf> {
|
||||||
|
public MergeComparator() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public int compare(Oaf left, Oaf right) {
|
||||||
|
// nulls at the end
|
||||||
|
if (left == null && right == null) {
|
||||||
|
return 0;
|
||||||
|
} else if (left == null) {
|
||||||
|
return -1;
|
||||||
|
} else if (right == null) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// invisible
|
||||||
|
if (left.getDataInfo() != null && left.getDataInfo().getInvisible() == true) {
|
||||||
|
if (right.getDataInfo() != null && right.getDataInfo().getInvisible() == false) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// collectedfrom
|
||||||
|
HashSet<String> lCf = getCollectedFromIds(left);
|
||||||
|
HashSet<String> rCf = getCollectedFromIds(right);
|
||||||
|
if (lCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
|
||||||
|
&& !rCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")) {
|
||||||
|
return -1;
|
||||||
|
} else if (!lCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
|
||||||
|
&& rCf.contains("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
SubEntityType lClass = SubEntityType.fromClass(left.getClass());
|
||||||
|
SubEntityType rClass = SubEntityType.fromClass(right.getClass());
|
||||||
|
return lClass.ordinal() - rClass.ordinal();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
protected HashSet<String> getCollectedFromIds(Oaf left) {
|
||||||
|
return (HashSet) Optional.ofNullable(left.getCollectedfrom()).map((cf) -> {
|
||||||
|
return (HashSet) cf.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
|
||||||
|
}).orElse(new HashSet());
|
||||||
|
}
|
||||||
|
|
||||||
|
enum SubEntityType {
|
||||||
|
publication, dataset, software, otherresearchproduct, datasource, organization, project;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolves the EntityType, given the relative class name
|
||||||
|
*
|
||||||
|
* @param clazz the given class name
|
||||||
|
* @param <T> actual OafEntity subclass
|
||||||
|
* @return the EntityType associated to the given class
|
||||||
|
*/
|
||||||
|
public static <T extends Oaf> SubEntityType fromClass(Class<T> clazz) {
|
||||||
|
return valueOf(clazz.getSimpleName().toLowerCase());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,106 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
public class MergeEntitiesComparator implements Comparator<Oaf> {
|
||||||
|
static final List<String> PID_AUTHORITIES = Arrays
|
||||||
|
.asList(
|
||||||
|
ModelConstants.ARXIV_ID,
|
||||||
|
ModelConstants.PUBMED_CENTRAL_ID,
|
||||||
|
ModelConstants.EUROPE_PUBMED_CENTRAL_ID,
|
||||||
|
ModelConstants.DATACITE_ID,
|
||||||
|
ModelConstants.CROSSREF_ID);
|
||||||
|
|
||||||
|
static final List<String> RESULT_TYPES = Arrays
|
||||||
|
.asList(
|
||||||
|
ModelConstants.ORP_RESULTTYPE_CLASSID,
|
||||||
|
ModelConstants.SOFTWARE_RESULTTYPE_CLASSID,
|
||||||
|
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
||||||
|
ModelConstants.PUBLICATION_RESULTTYPE_CLASSID);
|
||||||
|
|
||||||
|
public static final Comparator<Oaf> INSTANCE = new MergeEntitiesComparator();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(Oaf left, Oaf right) {
|
||||||
|
if (left == null && right == null)
|
||||||
|
return 0;
|
||||||
|
if (left == null)
|
||||||
|
return -1;
|
||||||
|
if (right == null)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
int res = 0;
|
||||||
|
|
||||||
|
// pid authority
|
||||||
|
int cfp1 = Optional
|
||||||
|
.ofNullable(left.getCollectedfrom())
|
||||||
|
.map(
|
||||||
|
cf -> cf
|
||||||
|
.stream()
|
||||||
|
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
|
||||||
|
.max(Integer::compare)
|
||||||
|
.orElse(-1))
|
||||||
|
.orElse(-1);
|
||||||
|
int cfp2 = Optional
|
||||||
|
.ofNullable(right.getCollectedfrom())
|
||||||
|
.map(
|
||||||
|
cf -> cf
|
||||||
|
.stream()
|
||||||
|
.map(kv -> PID_AUTHORITIES.indexOf(kv.getKey()))
|
||||||
|
.max(Integer::compare)
|
||||||
|
.orElse(-1))
|
||||||
|
.orElse(-1);
|
||||||
|
|
||||||
|
if (cfp1 >= 0 && cfp1 > cfp2) {
|
||||||
|
return 1;
|
||||||
|
} else if (cfp2 >= 0 && cfp2 > cfp1) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// trust
|
||||||
|
if (left.getDataInfo() != null && right.getDataInfo() != null) {
|
||||||
|
res = left.getDataInfo().getTrust().compareTo(right.getDataInfo().getTrust());
|
||||||
|
}
|
||||||
|
|
||||||
|
// result type
|
||||||
|
if (res == 0) {
|
||||||
|
if (left instanceof Result && right instanceof Result) {
|
||||||
|
Result r1 = (Result) left;
|
||||||
|
Result r2 = (Result) right;
|
||||||
|
|
||||||
|
if (r1.getResulttype() == null || r1.getResulttype().getClassid() == null) {
|
||||||
|
if (r2.getResulttype() != null && r2.getResulttype().getClassid() != null) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
} else if (r2.getResulttype() == null || r2.getResulttype().getClassid() == null) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int rt1 = RESULT_TYPES.indexOf(r1.getResulttype().getClassid());
|
||||||
|
int rt2 = RESULT_TYPES.indexOf(r2.getResulttype().getClassid());
|
||||||
|
|
||||||
|
if (rt1 >= 0 && rt1 > rt2) {
|
||||||
|
return 1;
|
||||||
|
} else if (rt2 >= 0 && rt2 > rt1) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// id
|
||||||
|
if (res == 0) {
|
||||||
|
if (left instanceof OafEntity && right instanceof OafEntity) {
|
||||||
|
res = ((OafEntity) right).getId().compareTo(((OafEntity) left).getId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,27 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
public class ModelHardLimits {
|
||||||
|
|
||||||
|
private ModelHardLimits() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String LAYOUT = "index";
|
||||||
|
public static final String INTERPRETATION = "openaire";
|
||||||
|
public static final String SEPARATOR = "-";
|
||||||
|
|
||||||
|
public static final int MAX_EXTERNAL_ENTITIES = 50;
|
||||||
|
public static final int MAX_AUTHORS = 200;
|
||||||
|
public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000;
|
||||||
|
public static final int MAX_TITLE_LENGTH = 5000;
|
||||||
|
public static final int MAX_TITLES = 10;
|
||||||
|
public static final int MAX_ABSTRACTS = 10;
|
||||||
|
public static final int MAX_ABSTRACT_LENGTH = 150000;
|
||||||
|
public static final int MAX_RELATED_ABSTRACT_LENGTH = 500;
|
||||||
|
public static final int MAX_INSTANCES = 10;
|
||||||
|
|
||||||
|
public static String getCollectionName(String format) {
|
||||||
|
return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -14,8 +14,6 @@ import java.util.stream.Collectors;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class OafMapperUtils {
|
public class OafMapperUtils {
|
||||||
|
@ -23,65 +21,6 @@ public class OafMapperUtils {
|
||||||
private OafMapperUtils() {
|
private OafMapperUtils() {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Oaf merge(final Oaf left, final Oaf right) {
|
|
||||||
if (ModelSupport.isSubClass(left, OafEntity.class)) {
|
|
||||||
return mergeEntities((OafEntity) left, (OafEntity) right);
|
|
||||||
} else if (ModelSupport.isSubClass(left, Relation.class)) {
|
|
||||||
((Relation) left).mergeFrom((Relation) right);
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
|
|
||||||
}
|
|
||||||
return left;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static OafEntity mergeEntities(OafEntity left, OafEntity right) {
|
|
||||||
if (ModelSupport.isSubClass(left, Result.class)) {
|
|
||||||
return mergeResults((Result) left, (Result) right);
|
|
||||||
} else if (ModelSupport.isSubClass(left, Datasource.class)) {
|
|
||||||
left.mergeFrom(right);
|
|
||||||
} else if (ModelSupport.isSubClass(left, Organization.class)) {
|
|
||||||
left.mergeFrom(right);
|
|
||||||
} else if (ModelSupport.isSubClass(left, Project.class)) {
|
|
||||||
left.mergeFrom(right);
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
|
|
||||||
}
|
|
||||||
return left;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Result mergeResults(Result left, Result right) {
|
|
||||||
|
|
||||||
final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority(left);
|
|
||||||
final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority(right);
|
|
||||||
|
|
||||||
if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) {
|
|
||||||
return left;
|
|
||||||
}
|
|
||||||
if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) {
|
|
||||||
return right;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (new ResultTypeComparator().compare(left, right) < 0) {
|
|
||||||
left.mergeFrom(right);
|
|
||||||
return left;
|
|
||||||
} else {
|
|
||||||
right.mergeFrom(left);
|
|
||||||
return right;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean isFromDelegatedAuthority(Result r) {
|
|
||||||
return Optional
|
|
||||||
.ofNullable(r.getInstance())
|
|
||||||
.map(
|
|
||||||
instance -> instance
|
|
||||||
.stream()
|
|
||||||
.filter(i -> Objects.nonNull(i.getCollectedfrom()))
|
|
||||||
.map(i -> i.getCollectedfrom().getKey())
|
|
||||||
.anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)))
|
|
||||||
.orElse(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static KeyValue keyValue(final String k, final String v) {
|
public static KeyValue keyValue(final String k, final String v) {
|
||||||
final KeyValue kv = new KeyValue();
|
final KeyValue kv = new KeyValue();
|
||||||
kv.setKey(k);
|
kv.setKey(k);
|
||||||
|
@ -141,6 +80,28 @@ public class OafMapperUtils {
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static InstanceTypeMapping instanceTypeMapping(String originalType, String code, String label,
|
||||||
|
String vocabularyName) {
|
||||||
|
final InstanceTypeMapping m = new InstanceTypeMapping();
|
||||||
|
m.setVocabularyName(vocabularyName);
|
||||||
|
m.setOriginalType(originalType);
|
||||||
|
m.setTypeCode(code);
|
||||||
|
m.setTypeLabel(label);
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static InstanceTypeMapping instanceTypeMapping(String originalType, Qualifier term) {
|
||||||
|
return instanceTypeMapping(originalType, term.getClassid(), term.getClassname(), term.getSchemeid());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static InstanceTypeMapping instanceTypeMapping(String originalType) {
|
||||||
|
return instanceTypeMapping(originalType, null, null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static InstanceTypeMapping instanceTypeMapping(String originalType, String vocabularyName) {
|
||||||
|
return instanceTypeMapping(originalType, null, null, vocabularyName);
|
||||||
|
}
|
||||||
|
|
||||||
public static Qualifier unknown(final String schemeid, final String schemename) {
|
public static Qualifier unknown(final String schemeid, final String schemename) {
|
||||||
return qualifier(UNKNOWN, "Unknown", schemeid, schemename);
|
return qualifier(UNKNOWN, "Unknown", schemeid, schemename);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,46 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
public class OrganizationPidComparator implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||||
|
if (left == null) {
|
||||||
|
return right == null ? 0 : -1;
|
||||||
|
} else if (right == null) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
|
||||||
|
PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
|
||||||
|
|
||||||
|
if (lClass.equals(rClass))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.openorgs))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.openorgs))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.GRID))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.GRID))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.mag_id))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.mag_id))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.urn))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.urn))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,8 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
|
||||||
|
public class PidBlacklist extends HashMap<String, HashSet<String>> {
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
public class PidBlacklistProvider {
|
||||||
|
|
||||||
|
private static final PidBlacklist blacklist;
|
||||||
|
|
||||||
|
static {
|
||||||
|
try {
|
||||||
|
String json = IOUtils.toString(IdentifierFactory.class.getResourceAsStream("pid_blacklist.json"));
|
||||||
|
blacklist = new ObjectMapper().readValue(json, PidBlacklist.class);
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PidBlacklist getBlacklist() {
|
||||||
|
return blacklist;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Set<String> getBlacklist(String pidType) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(getBlacklist().get(pidType))
|
||||||
|
.orElse(new HashSet<>());
|
||||||
|
}
|
||||||
|
|
||||||
|
private PidBlacklistProvider() {
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
public class PidComparator<T extends OafEntity> implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
|
private final T entity;
|
||||||
|
|
||||||
|
public PidComparator(T entity) {
|
||||||
|
this.entity = entity;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||||
|
|
||||||
|
if (left == null && right == null)
|
||||||
|
return 0;
|
||||||
|
if (left == null)
|
||||||
|
return 1;
|
||||||
|
if (right == null)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (ModelSupport.isSubClass(entity, Result.class)) {
|
||||||
|
return compareResultPids(left, right);
|
||||||
|
}
|
||||||
|
if (ModelSupport.isSubClass(entity, Organization.class)) {
|
||||||
|
return compareOrganizationtPids(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Else (but unlikely), lexicographical ordering will do.
|
||||||
|
return left.getQualifier().getClassid().compareTo(right.getQualifier().getClassid());
|
||||||
|
}
|
||||||
|
|
||||||
|
private int compareResultPids(StructuredProperty left, StructuredProperty right) {
|
||||||
|
return new ResultPidComparator().compare(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int compareOrganizationtPids(StructuredProperty left, StructuredProperty right) {
|
||||||
|
return new OrganizationPidComparator().compare(left, right);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.EnumUtils;
|
||||||
|
|
||||||
|
public enum PidType {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The DOI syntax shall be made up of a DOI prefix and a DOI suffix separated by a forward slash.
|
||||||
|
*
|
||||||
|
* There is no defined limit on the length of the DOI name, or of the DOI prefix or DOI suffix.
|
||||||
|
*
|
||||||
|
* The DOI name is case-insensitive and can incorporate any printable characters from the legal graphic characters
|
||||||
|
* of Unicode. Further constraints on character use (e.g. use of language-specific alphanumeric characters) can be
|
||||||
|
* defined for an application by the ISO 26324 Registration Authority.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* DOI prefix: The DOI prefix shall be composed of a directory indicator followed by a registrant code.
|
||||||
|
* These two components shall be separated by a full stop (period). The directory indicator shall be "10" and
|
||||||
|
* distinguishes the entire set of character strings (prefix and suffix) as digital object identifiers within the
|
||||||
|
* resolution system.
|
||||||
|
*
|
||||||
|
* Registrant code: The second element of the DOI prefix shall be the registrant code. The registrant code is a
|
||||||
|
* unique string assigned to a registrant.
|
||||||
|
*
|
||||||
|
* DOI suffix: The DOI suffix shall consist of a character string of any length chosen by the registrant.
|
||||||
|
* Each suffix shall be unique to the prefix element that precedes it. The unique suffix can be a sequential number,
|
||||||
|
* or it might incorporate an identifier generated from or based on another system used by the registrant
|
||||||
|
* (e.g. ISAN, ISBN, ISRC, ISSN, ISTC, ISNI; in such cases, a preferred construction for such a suffix can be
|
||||||
|
* specified, as in Example 1).
|
||||||
|
*
|
||||||
|
* Source: https://www.doi.org/doi_handbook/2_Numbering.html#2.2
|
||||||
|
*/
|
||||||
|
doi,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PubMed Unique Identifier (PMID)
|
||||||
|
*
|
||||||
|
* This field is a 1-to-8 digit accession number with no leading zeros. It is present on all records and is the
|
||||||
|
* accession number for managing and disseminating records. PMIDs are not reused after records are deleted.
|
||||||
|
*
|
||||||
|
* Beginning in February 2012 PMIDs include extensions following a decimal point to account for article versions
|
||||||
|
* (e.g., 21804956.2). All citations are considered version 1 until replaced. The extended PMID is not displayed
|
||||||
|
* on the MEDLINE format.
|
||||||
|
*
|
||||||
|
* View the citation in abstract format in PubMed to access additional versions when available (see the article in
|
||||||
|
* the Jan-Feb 2012 NLM Technical Bulletin).
|
||||||
|
*
|
||||||
|
* Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid
|
||||||
|
*/
|
||||||
|
pmid,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This field contains the unique identifier for the cited article in PubMed Central. The identifier begins with the
|
||||||
|
* prefix PMC.
|
||||||
|
*
|
||||||
|
* Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc
|
||||||
|
*/
|
||||||
|
pmc, handle, arXiv, nct, pdb, w3id,
|
||||||
|
|
||||||
|
// Organization
|
||||||
|
openorgs, ROR, GRID, PIC, ISNI, Wikidata, FundRef, corda, corda_h2020, mag_id, urn,
|
||||||
|
|
||||||
|
// Used by dedup
|
||||||
|
undefined, original;
|
||||||
|
|
||||||
|
public static boolean isValid(String type) {
|
||||||
|
return EnumUtils.isValidEnum(PidType.class, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PidType tryValueOf(String s) {
|
||||||
|
try {
|
||||||
|
return PidType.valueOf(s);
|
||||||
|
} catch (Exception e) {
|
||||||
|
return PidType.original;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
public class PidValueComparator implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||||
|
|
||||||
|
if (left == null && right == null)
|
||||||
|
return 0;
|
||||||
|
if (left == null)
|
||||||
|
return 1;
|
||||||
|
if (right == null)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
StructuredProperty l = PidCleaner.normalizePidValue(left);
|
||||||
|
StructuredProperty r = PidCleaner.normalizePidValue(right);
|
||||||
|
|
||||||
|
return Optional
|
||||||
|
.ofNullable(l.getValue())
|
||||||
|
.map(
|
||||||
|
lv -> Optional
|
||||||
|
.ofNullable(r.getValue())
|
||||||
|
.map(rv -> lv.compareTo(rv))
|
||||||
|
.orElse(-1))
|
||||||
|
.orElse(1);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,46 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Comparator for sorting the values from the dnet:review_levels vocabulary, implements the following ordering
|
||||||
|
*
|
||||||
|
* peerReviewed (0001) > nonPeerReviewed (0002) > UNKNOWN (0000)
|
||||||
|
*/
|
||||||
|
public class RefereedComparator implements Comparator<Qualifier> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(Qualifier left, Qualifier right) {
|
||||||
|
if (left == null || left.getClassid() == null) {
|
||||||
|
return (right == null || right.getClassid() == null) ? 0 : -1;
|
||||||
|
} else if (right == null || right.getClassid() == null) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
String lClass = left.getClassid();
|
||||||
|
String rClass = right.getClassid();
|
||||||
|
|
||||||
|
if (lClass.equals(rClass))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if ("0001".equals(lClass))
|
||||||
|
return -1;
|
||||||
|
if ("0001".equals(rClass))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if ("0002".equals(lClass))
|
||||||
|
return -1;
|
||||||
|
if ("0002".equals(rClass))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if ("0000".equals(lClass))
|
||||||
|
return -1;
|
||||||
|
if ("0000".equals(rClass))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,56 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
|
public class ResultPidComparator implements Comparator<StructuredProperty> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||||
|
|
||||||
|
PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
|
||||||
|
PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
|
||||||
|
|
||||||
|
if (lClass.equals(rClass))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.doi))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.doi))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.pmid))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.pmid))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.pmc))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.pmc))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.handle))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.handle))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.arXiv))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.arXiv))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.nct))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.nct))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals(PidType.pdb))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals(PidType.pdb))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -28,6 +28,7 @@ import com.jayway.jsonpath.JsonPath;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo;
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner;
|
||||||
import net.minidev.json.JSONArray;
|
import net.minidev.json.JSONArray;
|
||||||
import scala.collection.JavaConverters;
|
import scala.collection.JavaConverters;
|
||||||
import scala.collection.Seq;
|
import scala.collection.Seq;
|
||||||
|
@ -104,7 +105,7 @@ public class DHPUtils {
|
||||||
|
|
||||||
public static String generateUnresolvedIdentifier(final String pid, final String pidType) {
|
public static String generateUnresolvedIdentifier(final String pid, final String pidType) {
|
||||||
|
|
||||||
final String cleanedPid = CleaningFunctions.normalizePidValue(pidType, pid);
|
final String cleanedPid = PidCleaner.normalizePidValue(pidType, pid);
|
||||||
|
|
||||||
return String.format("unresolved::%s::%s", cleanedPid, pidType.toLowerCase().trim());
|
return String.format("unresolved::%s::%s", cleanedPid, pidType.toLowerCase().trim());
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.pace.common;
|
||||||
|
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.text.Normalizer;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.google.common.base.Splitter;
|
||||||
|
import com.google.common.collect.Iterables;
|
||||||
|
import com.google.common.collect.Sets;
|
||||||
|
import com.ibm.icu.text.Transliterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set of common functions for the framework
|
||||||
|
*
|
||||||
|
* @author claudio
|
||||||
|
*/
|
||||||
|
public class PaceCommonUtils {
|
||||||
|
|
||||||
|
// transliterator
|
||||||
|
protected static Transliterator transliterator = Transliterator.getInstance("Any-Eng");
|
||||||
|
|
||||||
|
protected static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń";
|
||||||
|
protected static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn";
|
||||||
|
|
||||||
|
protected static Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})");
|
||||||
|
|
||||||
|
protected static String fixAliases(final String s) {
|
||||||
|
final StringBuilder sb = new StringBuilder();
|
||||||
|
|
||||||
|
s.chars().forEach(ch -> {
|
||||||
|
final int i = StringUtils.indexOf(aliases_from, ch);
|
||||||
|
sb.append(i >= 0 ? aliases_to.charAt(i) : (char) ch);
|
||||||
|
});
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static String transliterate(final String s) {
|
||||||
|
try {
|
||||||
|
return transliterator.transliterate(s);
|
||||||
|
} catch (Exception e) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String normalize(final String s) {
|
||||||
|
return fixAliases(transliterate(nfd(unicodeNormalization(s))))
|
||||||
|
.toLowerCase()
|
||||||
|
// do not compact the regexes in a single expression, would cause StackOverflowError in case of large input
|
||||||
|
// strings
|
||||||
|
.replaceAll("[^ \\w]+", "")
|
||||||
|
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", "")
|
||||||
|
.replaceAll("(\\p{Punct})+", " ")
|
||||||
|
.replaceAll("(\\d)+", " ")
|
||||||
|
.replaceAll("(\\n)+", " ")
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String nfd(final String s) {
|
||||||
|
return Normalizer.normalize(s, Normalizer.Form.NFD);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String unicodeNormalization(final String s) {
|
||||||
|
|
||||||
|
Matcher m = hexUnicodePattern.matcher(s);
|
||||||
|
StringBuffer buf = new StringBuffer(s.length());
|
||||||
|
while (m.find()) {
|
||||||
|
String ch = String.valueOf((char) Integer.parseInt(m.group(1), 16));
|
||||||
|
m.appendReplacement(buf, Matcher.quoteReplacement(ch));
|
||||||
|
}
|
||||||
|
m.appendTail(buf);
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Set<String> loadFromClasspath(final String classpath) {
|
||||||
|
|
||||||
|
Transliterator transliterator = Transliterator.getInstance("Any-Eng");
|
||||||
|
|
||||||
|
final Set<String> h = Sets.newHashSet();
|
||||||
|
try {
|
||||||
|
for (final String s : IOUtils
|
||||||
|
.readLines(PaceCommonUtils.class.getResourceAsStream(classpath), StandardCharsets.UTF_8)) {
|
||||||
|
h.add(fixAliases(transliterator.transliterate(s))); // transliteration of the stopwords
|
||||||
|
}
|
||||||
|
} catch (final Throwable e) {
|
||||||
|
return Sets.newHashSet();
|
||||||
|
}
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static Iterable<String> tokens(final String s, final int maxTokens) {
|
||||||
|
return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -12,7 +12,7 @@ import com.google.common.collect.Iterables;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.common.hash.Hashing;
|
import com.google.common.hash.Hashing;
|
||||||
|
|
||||||
import eu.dnetlib.pace.common.AbstractPaceFunctions;
|
import eu.dnetlib.pace.common.PaceCommonUtils;
|
||||||
import eu.dnetlib.pace.util.Capitalise;
|
import eu.dnetlib.pace.util.Capitalise;
|
||||||
import eu.dnetlib.pace.util.DotAbbreviations;
|
import eu.dnetlib.pace.util.DotAbbreviations;
|
||||||
|
|
||||||
|
@ -86,7 +86,7 @@ public class Person {
|
||||||
|
|
||||||
private List<String> splitTerms(final String s) {
|
private List<String> splitTerms(final String s) {
|
||||||
if (particles == null) {
|
if (particles == null) {
|
||||||
particles = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt");
|
particles = PaceCommonUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt");
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<String> list = Lists.newArrayList();
|
final List<String> list = Lists.newArrayList();
|
|
@ -15,4 +15,4 @@ public class Capitalise implements Function<String, String> {
|
||||||
public String apply(final String s) {
|
public String apply(final String s) {
|
||||||
return WordUtils.capitalize(s.toLowerCase(), DELIM);
|
return WordUtils.capitalize(s.toLowerCase(), DELIM);
|
||||||
}
|
}
|
||||||
};
|
}
|
|
@ -8,4 +8,4 @@ public class DotAbbreviations implements Function<String, String> {
|
||||||
public String apply(String s) {
|
public String apply(String s) {
|
||||||
return s.length() == 1 ? s + "." : s;
|
return s.length() == 1 ? s + "." : s;
|
||||||
}
|
}
|
||||||
};
|
}
|
|
@ -1,26 +0,0 @@
|
||||||
[
|
|
||||||
{
|
|
||||||
"paramName": "issm",
|
|
||||||
"paramLongName": "isSparkSessionManaged",
|
|
||||||
"paramDescription": "when true will stop SparkSession after job execution",
|
|
||||||
"paramRequired": false
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "i",
|
|
||||||
"paramLongName": "inputPath",
|
|
||||||
"paramDescription": "the source path",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "o",
|
|
||||||
"paramLongName": "outputPath",
|
|
||||||
"paramDescription": "path of the output graph",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "fi",
|
|
||||||
"paramLongName": "filterInvisible",
|
|
||||||
"paramDescription": "if true filters out invisible entities",
|
|
||||||
"paramRequired": true
|
|
||||||
}
|
|
||||||
]
|
|
|
@ -8,13 +8,31 @@
|
||||||
{
|
{
|
||||||
"paramName": "gin",
|
"paramName": "gin",
|
||||||
"paramLongName": "graphInputPath",
|
"paramLongName": "graphInputPath",
|
||||||
"paramDescription": "the graph root path",
|
"paramDescription": "the input graph root path",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "cp",
|
||||||
|
"paramLongName": "checkpointPath",
|
||||||
|
"paramDescription": "checkpoint directory",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"paramName": "out",
|
"paramName": "out",
|
||||||
"paramLongName": "outputPath",
|
"paramLongName": "outputPath",
|
||||||
"paramDescription": "the output merged graph root path",
|
"paramDescription": "the output graph root path",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "fi",
|
||||||
|
"paramLongName": "filterInvisible",
|
||||||
|
"paramDescription": "if true filters out invisible entities",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "isu",
|
||||||
|
"paramLongName": "isLookupUrl",
|
||||||
|
"paramDescription": "url to the ISLookup Service",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
}
|
}
|
||||||
]
|
]
|
|
@ -0,0 +1,20 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "issm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "when true will stop SparkSession after job execution",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "hmu",
|
||||||
|
"paramLongName": "hiveMetastoreUris",
|
||||||
|
"paramDescription": "the hive metastore uris",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "sql",
|
||||||
|
"paramLongName": "sql",
|
||||||
|
"paramDescription": "sql script to execute",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
|
@ -154,5 +154,13 @@
|
||||||
"unknown":{
|
"unknown":{
|
||||||
"original":"Unknown",
|
"original":"Unknown",
|
||||||
"inverse":"Unknown"
|
"inverse":"Unknown"
|
||||||
|
},
|
||||||
|
"isamongtopnsimilardocuments": {
|
||||||
|
"original": "IsAmongTopNSimilarDocuments",
|
||||||
|
"inverse": "HasAmongTopNSimilarDocuments"
|
||||||
|
},
|
||||||
|
"hasamongtopnsimilardocuments": {
|
||||||
|
"original": "HasAmongTopNSimilarDocuments",
|
||||||
|
"inverse": "IsAmongTopNSimilarDocuments"
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,5 +1,8 @@
|
||||||
package eu.dnetlib.dhp.application
|
package eu.dnetlib.dhp.application
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.Constants
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile
|
||||||
|
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
|
|
||||||
/** This is the main Interface SparkApplication
|
/** This is the main Interface SparkApplication
|
||||||
|
@ -62,12 +65,22 @@ abstract class AbstractScalaApplication(
|
||||||
val conf: SparkConf = new SparkConf()
|
val conf: SparkConf = new SparkConf()
|
||||||
val master = parser.get("master")
|
val master = parser.get("master")
|
||||||
log.info(s"Creating Spark session: Master: $master")
|
log.info(s"Creating Spark session: Master: $master")
|
||||||
SparkSession
|
val b = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.appName(getClass.getSimpleName)
|
.appName(getClass.getSimpleName)
|
||||||
.master(master)
|
if (master != null)
|
||||||
.getOrCreate()
|
b.master(master)
|
||||||
|
b.getOrCreate()
|
||||||
|
}
|
||||||
|
|
||||||
|
def reportTotalSize(targetPath: String, outputBasePath: String): Unit = {
|
||||||
|
val total_items = spark.read.text(targetPath).count()
|
||||||
|
writeHdfsFile(
|
||||||
|
spark.sparkContext.hadoopConfiguration,
|
||||||
|
s"$total_items",
|
||||||
|
outputBasePath + Constants.MDSTORE_SIZE_PATH
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,7 +65,11 @@ object ScholixUtils extends Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
def generateScholixResourceFromResult(r: Result): ScholixResource = {
|
def generateScholixResourceFromResult(r: Result): ScholixResource = {
|
||||||
generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r))
|
val sum = ScholixUtils.resultToSummary(r)
|
||||||
|
if (sum != null)
|
||||||
|
generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r))
|
||||||
|
else
|
||||||
|
null
|
||||||
}
|
}
|
||||||
|
|
||||||
val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] =
|
val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] =
|
||||||
|
@ -153,6 +157,14 @@ object ScholixUtils extends Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def invRel(rel: String): String = {
|
||||||
|
val semanticRelation = relations.getOrElse(rel.toLowerCase, null)
|
||||||
|
if (semanticRelation != null)
|
||||||
|
semanticRelation.inverse
|
||||||
|
else
|
||||||
|
null
|
||||||
|
}
|
||||||
|
|
||||||
def extractCollectedFrom(summary: ScholixResource): List[ScholixEntityId] = {
|
def extractCollectedFrom(summary: ScholixResource): List[ScholixEntityId] = {
|
||||||
if (summary.getCollectedFrom != null && !summary.getCollectedFrom.isEmpty) {
|
if (summary.getCollectedFrom != null && !summary.getCollectedFrom.isEmpty) {
|
||||||
val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { d =>
|
val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { d =>
|
||||||
|
@ -377,10 +389,7 @@ object ScholixUtils extends Serializable {
|
||||||
if (persistentIdentifiers.isEmpty)
|
if (persistentIdentifiers.isEmpty)
|
||||||
return null
|
return null
|
||||||
s.setLocalIdentifier(persistentIdentifiers.asJava)
|
s.setLocalIdentifier(persistentIdentifiers.asJava)
|
||||||
if (r.isInstanceOf[Publication])
|
// s.setTypology(r.getResulttype.getClassid)
|
||||||
s.setTypology(Typology.publication)
|
|
||||||
else
|
|
||||||
s.setTypology(Typology.dataset)
|
|
||||||
|
|
||||||
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
|
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
|
||||||
|
|
||||||
|
|
|
@ -1,109 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.junit.jupiter.api.Assertions;
|
|
||||||
import org.junit.jupiter.api.Disabled;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
@Disabled
|
|
||||||
class ZenodoAPIClientTest {
|
|
||||||
|
|
||||||
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
|
||||||
private final String ACCESS_TOKEN = "";
|
|
||||||
|
|
||||||
private final String CONCEPT_REC_ID = "657113";
|
|
||||||
|
|
||||||
private final String depositionId = "674915";
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
|
||||||
ACCESS_TOKEN);
|
|
||||||
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
|
|
||||||
|
|
||||||
File file = new File(getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
|
|
||||||
.getPath());
|
|
||||||
|
|
||||||
InputStream is = new FileInputStream(file);
|
|
||||||
|
|
||||||
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
|
|
||||||
|
|
||||||
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
|
|
||||||
|
|
||||||
Assertions.assertEquals(200, client.sendMretadata(metadata));
|
|
||||||
|
|
||||||
Assertions.assertEquals(202, client.publish());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testNewDeposition() throws IOException {
|
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
|
||||||
ACCESS_TOKEN);
|
|
||||||
Assertions.assertEquals(201, client.newDeposition());
|
|
||||||
|
|
||||||
File file = new File(getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
|
|
||||||
.getPath());
|
|
||||||
|
|
||||||
InputStream is = new FileInputStream(file);
|
|
||||||
|
|
||||||
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
|
|
||||||
|
|
||||||
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
|
|
||||||
|
|
||||||
Assertions.assertEquals(200, client.sendMretadata(metadata));
|
|
||||||
|
|
||||||
Assertions.assertEquals(202, client.publish());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
|
||||||
ACCESS_TOKEN);
|
|
||||||
|
|
||||||
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
|
||||||
|
|
||||||
File file = new File(getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/common/api/newVersion")
|
|
||||||
.getPath());
|
|
||||||
|
|
||||||
InputStream is = new FileInputStream(file);
|
|
||||||
|
|
||||||
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
|
|
||||||
|
|
||||||
Assertions.assertEquals(202, client.publish());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
|
||||||
ACCESS_TOKEN);
|
|
||||||
|
|
||||||
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
|
||||||
|
|
||||||
File file = new File(getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
|
|
||||||
.getPath());
|
|
||||||
|
|
||||||
InputStream is = new FileInputStream(file);
|
|
||||||
|
|
||||||
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
|
|
||||||
|
|
||||||
Assertions.assertEquals(202, client.publish());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
class BlackListProviderTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void blackListTest() {
|
||||||
|
|
||||||
|
Assertions.assertNotNull(PidBlacklistProvider.getBlacklist());
|
||||||
|
Assertions.assertNotNull(PidBlacklistProvider.getBlacklist().get("doi"));
|
||||||
|
Assertions.assertTrue(PidBlacklistProvider.getBlacklist().get("doi").size() > 0);
|
||||||
|
final Set<String> xxx = PidBlacklistProvider.getBlacklist("xxx");
|
||||||
|
Assertions.assertNotNull(xxx);
|
||||||
|
Assertions.assertEquals(0, xxx.size());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,87 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
|
||||||
|
class IdentifierFactoryTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||||
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCreateIdentifierForPublication() throws IOException {
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi1.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi2.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi3.json", "50|pmc_________::e2a339e0e11bfbf55462e14a07f1b304", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_doi5.json", "50|doi_________::3bef95c0ca26dd55451fc8839ea69d27", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_pmc2.json", "50|pmc_________::e2a339e0e11bfbf55462e14a07f1b304", true);
|
||||||
|
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_openapc.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
|
||||||
|
|
||||||
|
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
|
||||||
|
verifyIdentifier("publication_3.json", defaultID, true);
|
||||||
|
verifyIdentifier("publication_4.json", defaultID, true);
|
||||||
|
verifyIdentifier("publication_5.json", defaultID, true);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCreateIdentifierForPublicationNoHash() throws IOException {
|
||||||
|
|
||||||
|
verifyIdentifier("publication_doi1.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
|
||||||
|
verifyIdentifier("publication_doi2.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
|
||||||
|
verifyIdentifier("publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
|
||||||
|
verifyIdentifier(
|
||||||
|
"publication_urn1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
|
||||||
|
|
||||||
|
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
|
||||||
|
verifyIdentifier("publication_3.json", defaultID, false);
|
||||||
|
verifyIdentifier("publication_4.json", defaultID, false);
|
||||||
|
verifyIdentifier("publication_5.json", defaultID, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCreateIdentifierForROHub() throws IOException {
|
||||||
|
verifyIdentifier(
|
||||||
|
"orp-rohub.json", "50|w3id________::afc7592914ae190a50570db90f55f9c2", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void verifyIdentifier(String filename, String expectedID, boolean md5) throws IOException {
|
||||||
|
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
|
||||||
|
final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class);
|
||||||
|
|
||||||
|
String id = IdentifierFactory.createIdentifier(pub, md5);
|
||||||
|
System.out.println(id);
|
||||||
|
assertNotNull(id);
|
||||||
|
assertEquals(expectedID, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,130 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.beanutils.BeanUtils;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
|
public class MergeUtilsTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||||
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMergePubs_new() throws IOException {
|
||||||
|
Publication pt = read("publication_test.json", Publication.class);
|
||||||
|
Publication p1 = read("publication_test.json", Publication.class);
|
||||||
|
|
||||||
|
assertEquals(1, pt.getCollectedfrom().size());
|
||||||
|
assertEquals(ModelConstants.CROSSREF_ID, pt.getCollectedfrom().get(0).getKey());
|
||||||
|
|
||||||
|
Instance i = new Instance();
|
||||||
|
i.setUrl(Lists.newArrayList("https://..."));
|
||||||
|
p1.getInstance().add(i);
|
||||||
|
|
||||||
|
Publication ptp1 = MergeUtils.mergePublication(pt, p1);
|
||||||
|
|
||||||
|
assertNotNull(ptp1.getInstance());
|
||||||
|
assertEquals(2, ptp1.getInstance().size());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMergePubs() throws IOException {
|
||||||
|
Publication p1 = read("publication_1.json", Publication.class);
|
||||||
|
Publication p2 = read("publication_2.json", Publication.class);
|
||||||
|
Dataset d1 = read("dataset_1.json", Dataset.class);
|
||||||
|
Dataset d2 = read("dataset_2.json", Dataset.class);
|
||||||
|
|
||||||
|
assertEquals(1, p1.getCollectedfrom().size());
|
||||||
|
assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
|
||||||
|
assertEquals(1, d2.getCollectedfrom().size());
|
||||||
|
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
|
|
||||||
|
assertEquals(1, p2.getCollectedfrom().size());
|
||||||
|
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
|
assertEquals(1, d1.getCollectedfrom().size());
|
||||||
|
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
|
|
||||||
|
final Result p1d2 = MergeUtils.checkedMerge(p1, d2, true);
|
||||||
|
assertEquals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, p1d2.getResulttype().getClassid());
|
||||||
|
assertTrue(p1d2 instanceof Publication);
|
||||||
|
assertEquals(p1.getId(), p1d2.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMergePubs_1() throws IOException {
|
||||||
|
Publication p2 = read("publication_2.json", Publication.class);
|
||||||
|
Dataset d1 = read("dataset_1.json", Dataset.class);
|
||||||
|
|
||||||
|
final Result p2d1 = MergeUtils.checkedMerge(p2, d1, true);
|
||||||
|
assertEquals((ModelConstants.DATASET_RESULTTYPE_CLASSID), p2d1.getResulttype().getClassid());
|
||||||
|
assertTrue(p2d1 instanceof Dataset);
|
||||||
|
assertEquals(d1.getId(), p2d1.getId());
|
||||||
|
assertEquals(2, p2d1.getCollectedfrom().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMergePubs_2() throws IOException {
|
||||||
|
Publication p1 = read("publication_1.json", Publication.class);
|
||||||
|
Publication p2 = read("publication_2.json", Publication.class);
|
||||||
|
|
||||||
|
Result p1p2 = MergeUtils.checkedMerge(p1, p2, true);
|
||||||
|
assertTrue(p1p2 instanceof Publication);
|
||||||
|
assertEquals(p1.getId(), p1p2.getId());
|
||||||
|
assertEquals(2, p1p2.getCollectedfrom().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testDelegatedAuthority_1() throws IOException {
|
||||||
|
Dataset d1 = read("dataset_2.json", Dataset.class);
|
||||||
|
Dataset d2 = read("dataset_delegated.json", Dataset.class);
|
||||||
|
|
||||||
|
assertEquals(1, d2.getCollectedfrom().size());
|
||||||
|
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
|
||||||
|
|
||||||
|
Result res = (Result) MergeUtils.merge(d1, d2, true);
|
||||||
|
|
||||||
|
assertEquals(d2, res);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testDelegatedAuthority_2() throws IOException {
|
||||||
|
Dataset p1 = read("publication_1.json", Dataset.class);
|
||||||
|
Dataset d2 = read("dataset_delegated.json", Dataset.class);
|
||||||
|
|
||||||
|
assertEquals(1, d2.getCollectedfrom().size());
|
||||||
|
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
|
||||||
|
|
||||||
|
Result res = (Result) MergeUtils.merge(p1, d2, true);
|
||||||
|
|
||||||
|
assertEquals(d2, res);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
|
||||||
|
return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
|
||||||
|
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
|
||||||
|
return OBJECT_MAPPER.readValue(json, clazz);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -149,7 +149,7 @@ class OafMapperUtilsTest {
|
||||||
void testDate() {
|
void testDate() {
|
||||||
final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
|
final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
|
||||||
assertNotNull(date);
|
assertNotNull(date);
|
||||||
System.out.println(date);
|
assertEquals("1998-02-23", date);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -166,8 +166,8 @@ class OafMapperUtilsTest {
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
|
ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
|
||||||
OafMapperUtils
|
MergeUtils
|
||||||
.mergeResults(p1, d2)
|
.mergeResult(p1, d2)
|
||||||
.getResulttype()
|
.getResulttype()
|
||||||
.getClassid());
|
.getClassid());
|
||||||
|
|
||||||
|
@ -177,11 +177,11 @@ class OafMapperUtilsTest {
|
||||||
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
|
||||||
OafMapperUtils
|
((Result) MergeUtils
|
||||||
.mergeResults(p2, d1)
|
.merge(p2, d1))
|
||||||
.getResulttype()
|
.getResulttype()
|
||||||
.getClassid());
|
.getClassid());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -192,7 +192,7 @@ class OafMapperUtilsTest {
|
||||||
assertEquals(1, d2.getCollectedfrom().size());
|
assertEquals(1, d2.getCollectedfrom().size());
|
||||||
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
|
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
|
||||||
|
|
||||||
Result res = OafMapperUtils.mergeResults(d1, d2);
|
Result res = MergeUtils.mergeResult(d1, d2);
|
||||||
|
|
||||||
assertEquals(d2, res);
|
assertEquals(d2, res);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0000/ra.v2i3.114::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.65008652949e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0000/ra.v2i3.114"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/(aj).v3i6.458::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.01810569717e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/(aj).v3i6.458"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/1587::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.39172290649e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/1587"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/462::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.33235333753e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.36"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.00285265116e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/462"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/731::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"4.01810569717e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/731"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0001/ijllis.v9i4.2066.g2482::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"8.48190886761e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0001/ijllis.v9i4.2066.g2482"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0118/alfahim.v3i1.140::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9.88840807598e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0118/alfahim.v3i1.140"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0166/fk2.stagefigshare.6442896.v3::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"7.28336930301e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0166/fk2.stagefigshare.6442896.v3"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0301/jttb.v2i1.64::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"7.28336930301e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0301/jttb.v2i1.64"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0809/seruni.v1i1.567::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"2.62959564033e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0809/seruni.v1i1.567"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0809/seruni.v2i1.765::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"9.40178571921e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0559872"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"3.67659957614e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0809/seruni.v2i1.765"}]}]}
|
||||||
|
{"dataInfo":{"deletedbyinference":false,"inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:enrich","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"id":"unresolved::10.0901/jkip.v7i3.485::doi","instance":[{"measures":[{"id":"influence","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"6.26204125721e-09"}]}],"pid":[{"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.0901/jkip.v7i3.485"}]}]}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]}
|
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[]}
|
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,33 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
|
||||||
|
"value": "Crossref"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
|
||||||
|
"value": "Crossref"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
|
||||||
|
"value": "Europe PubMed Central"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||||
|
"value": "Zenodo"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
|
||||||
|
"value": "Europe PubMed Central"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "PMC21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"id": "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||||
|
"value": "Zenodo"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "handle"},
|
||||||
|
"value": "11012/83840"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::2852",
|
||||||
|
"value": "Digital library of Brno University of Technology"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "handle"},
|
||||||
|
"value": "11012/83840"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
|
||||||
|
"value": "Zenodo"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.5281/zenodo.5121485"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
|
||||||
|
"value": "Europe PubMed Central"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,3 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resulttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ], "isGreen": null, "openAccessColor": "gold", "isInDiamondJournal": null, "publiclyFunded": null}
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resulttype" : { "classid" : "publication" }, "isGreen": true, "openAccessColor": "gold", "isInDiamondJournal": true, "publiclyFunded": false }
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1h", "resulttype" : { "classid" : "publication" }, "isGreen": false, "openAccessColor": null, "isInDiamondJournal": true, "publiclyFunded": false }
|
|
@ -0,0 +1,3 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resulttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ], "isGreen": null, "openAccessColor": "gold", "isInDiamondJournal": null, "publiclyFunded": null}
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resulttype" : { "classid" : "publication" }, "isGreen": true, "openAccessColor": "bronze", "isInDiamondJournal": true, "publiclyFunded": false }
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1h", "resulttype" : { "classid" : "publication" }, "isGreen": false, "openAccessColor": null, "isInDiamondJournal": true, "publiclyFunded": false }
|
|
@ -0,0 +1,31 @@
|
||||||
|
{
|
||||||
|
"id": "50|openapc_____::000023f9cb6e3a247c764daec4273cbc",
|
||||||
|
"resuttype": {
|
||||||
|
"classid": "publication"
|
||||||
|
},
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|apc_________::e2b1600b229fc30663c8a1f662debddf",
|
||||||
|
"value": "OpenAPC Global Initiative"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmid"},
|
||||||
|
"value": "25811027"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"url":["https://doi.org/10.1155/2015/439379"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "urn"},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "scp-number"},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "pmc"},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
|
||||||
|
"value": "Europe PubMed Central"
|
||||||
|
},
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {"classid": "doi"},
|
||||||
|
"value": "10.1016/j.cmet.2010.03.013"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier":{"classid":"pmc"},
|
||||||
|
"value":"PMC21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,428 @@
|
||||||
|
{
|
||||||
|
"author": [
|
||||||
|
{
|
||||||
|
"affiliation": null,
|
||||||
|
"fullname": "Deymier, Ghislaine",
|
||||||
|
"name": "Ghislaine",
|
||||||
|
"pid": [],
|
||||||
|
"rank": 1,
|
||||||
|
"surname": "Deymier"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": null,
|
||||||
|
"fullname": "Gaschet, Frédéric",
|
||||||
|
"name": "Frédéric",
|
||||||
|
"pid": [],
|
||||||
|
"rank": 2,
|
||||||
|
"surname": "Gaschet"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": null,
|
||||||
|
"fullname": "Pouyanne, Guillaume",
|
||||||
|
"name": "Guillaume",
|
||||||
|
"pid": [],
|
||||||
|
"rank": 3,
|
||||||
|
"surname": "Pouyanne"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"bestaccessright": {
|
||||||
|
"classid": "OPEN",
|
||||||
|
"classname": "Open Access",
|
||||||
|
"schemeid": "dnet:access_modes",
|
||||||
|
"schemename": "dnet:access_modes"
|
||||||
|
},
|
||||||
|
"collectedfrom": [
|
||||||
|
{
|
||||||
|
"key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
|
||||||
|
"value" : "Crossref"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"context": [],
|
||||||
|
"contributor": [],
|
||||||
|
"country": [],
|
||||||
|
"coverage": [],
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"dateofacceptance": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "2013-11-30"
|
||||||
|
},
|
||||||
|
"dateofcollection": "2024-02-28T00:22:13+0000",
|
||||||
|
"dateoftransformation": "2024-03-06T08:43:13.253Z",
|
||||||
|
"description": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "For analyzing the reciprocal interaction between urban sprawl and car use, research has first focused on the link between urban density and mobility. By looking for a reduction in energy consumption, cities have favoured a compact planning development. Then reflection has broadened from the simple density to the wider, multi-dimensional concept of urban form. This controversy has led to a renewal of analysis in term of the costs of urban growth, notably by comparing the costs of \"compact\" and \"sprawled\" development. The idea is to compare the mobility costs of different urban forms. However, most often because of a lack of data, such studies are scarce. This paper suggests an innovative method to compute mobility costs at an infra-urban scale : The Spatialized Travel Account (STA). It is based on the CERTU's travel account methodology at a metropolitan scale. It puts forward an accurate estimate of the mobility costs for each transport mode (individual and public) and for each type of payer (households, firms, local authorities...). In order to test the relationships between mobility costs and urban form, we link the computed costs to morphological characteristics of infra-urban zones, taking in account sociodemographic characteristics of households."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "L'interaction réciproque entre étalement urbain et usage de l'automobile a conduit la recherche à se focaliser sur le lien entre les densités urbaines et la mobilité. En cherchant à réduire leur consommation d'énergie pour les transports, et donc leurs émissions de Gaz à Effet de Serre, les villes ont alors cherché à planifier la \" ville compacte \", privilégiant notamment la reconstruction de la ville sur elle-même et la densification. Par la suite, la réflexion s'est élargie de la simple densité à la notion de forme urbaine et à toutes ses dimensions. Cette controverse devait conduire à un renouveau des analyses en termes de coûts de la croissance urbaine : le débat reste vif, encore aujourd'hui, sur les coûts comparés de la ville étalée et de la ville compacte. Plus largement, il s'agit d'explorer les coûts des différentes formes urbaines en termes de mobilité. Malgré cela, généralement pour des raisons de disponibilité de données, les études sur le sujet restent extrêmement rares. Cet article propose un outil novateur pour mesurer les coûts de la mobilité à l'échelle intraurbaine : le Compte Déplacements Territorialisé (CDT). Il s'inspire de la méthode développée par le CERTU pour l'établissement des Comptes Déplacements Voyageurs à l'échelle métropolitaine. Le CDT propose, pour chacune des zones de l'agglomération, une estimation précise de l'ensemble des coûts liés aux déplacements de personnes, ventilés par mode de transport (individuels et collectifs) et par type de financeurs (ménages, entreprises, collectivités territoriales, etc.). Nous proposons une application de cette méthode à la controverse sur le lien entre forme urbaine et coûts de la mobilité. Les coûts sont reliés aux caractéristiques morphologiques des zones (en termes de densité et de diversité, notamment), en prenant soin de contrôler les facteurs socio-économiques qui influent traditionnellement sur les comportements de mobilité (taille du ménage, revenu, etc.)."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"eoscifguidelines": [],
|
||||||
|
"externalReference": [],
|
||||||
|
"extraInfo": [],
|
||||||
|
"format": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "application/pdf"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fulltext": [],
|
||||||
|
"id": "50|06cdd3ff4700::4826ac62a11a957fe332e2c291dcfcca",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"accessright": {
|
||||||
|
"classid": "OPEN",
|
||||||
|
"classname": "Open Access",
|
||||||
|
"schemeid": "dnet:access_modes",
|
||||||
|
"schemename": "dnet:access_modes"
|
||||||
|
},
|
||||||
|
"alternateIdentifier": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "Digital Object Identifier",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.46298/cst.12132"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|openaire____::6824b298c96ba906a3e6a70593affbf5",
|
||||||
|
"value": "Episciences"
|
||||||
|
},
|
||||||
|
"dateofacceptance": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "2013-11-30"
|
||||||
|
},
|
||||||
|
"hostedby": {
|
||||||
|
"key": "10|openaire____::6824b298c96ba906a3e6a70593affbf5",
|
||||||
|
"value": "Episciences"
|
||||||
|
},
|
||||||
|
"instanceTypeMapping": [
|
||||||
|
{
|
||||||
|
"originalType": "http://purl.org/coar/resource_type/c_6501",
|
||||||
|
"typeCode": "http://purl.org/coar/resource_type/c_6501",
|
||||||
|
"typeLabel": "journal article",
|
||||||
|
"vocabularyName": "openaire::coar_resource_types_3_1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"originalType": "http://purl.org/coar/resource_type/c_6501",
|
||||||
|
"typeCode": "Article",
|
||||||
|
"typeLabel": "Article",
|
||||||
|
"vocabularyName": "openaire::user_resource_types"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"instancetype": {
|
||||||
|
"classid": "0001",
|
||||||
|
"classname": "Article",
|
||||||
|
"schemeid": "dnet:publication_resource",
|
||||||
|
"schemename": "dnet:publication_resource"
|
||||||
|
},
|
||||||
|
"license": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "CC BY NC SA"
|
||||||
|
},
|
||||||
|
"pid": [],
|
||||||
|
"refereed": {
|
||||||
|
"classid": "0002",
|
||||||
|
"classname": "nonPeerReviewed",
|
||||||
|
"schemeid": "dnet:review_levels",
|
||||||
|
"schemename": "dnet:review_levels"
|
||||||
|
},
|
||||||
|
"url": [
|
||||||
|
"https://doi.org/10.46298/cst.12132",
|
||||||
|
"https://cst.episciences.org/12132"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"language": {
|
||||||
|
"classid": "fra/fre",
|
||||||
|
"classname": "French",
|
||||||
|
"schemeid": "dnet:languages",
|
||||||
|
"schemename": "dnet:languages"
|
||||||
|
},
|
||||||
|
"lastupdatetimestamp": 1710636106633,
|
||||||
|
"metaResourceType": {
|
||||||
|
"classid": "Research Literature",
|
||||||
|
"classname": "Research Literature",
|
||||||
|
"schemeid": "openaire::meta_resource_types",
|
||||||
|
"schemename": "openaire::meta_resource_types"
|
||||||
|
},
|
||||||
|
"originalId": [
|
||||||
|
"oai:episciences.org:cst:12132",
|
||||||
|
"50|06cdd3ff4700::4826ac62a11a957fe332e2c291dcfcca"
|
||||||
|
],
|
||||||
|
"pid": [],
|
||||||
|
"publisher": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "episciences.org"
|
||||||
|
},
|
||||||
|
"relevantdate": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "Accepted",
|
||||||
|
"classname": "Accepted",
|
||||||
|
"schemeid": "dnet:dataCite_date",
|
||||||
|
"schemename": "dnet:dataCite_date"
|
||||||
|
},
|
||||||
|
"value": "2024-02-11"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "issued",
|
||||||
|
"classname": "issued",
|
||||||
|
"schemeid": "dnet:dataCite_date",
|
||||||
|
"schemename": "dnet:dataCite_date"
|
||||||
|
},
|
||||||
|
"value": "2013-11-30"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "available",
|
||||||
|
"classname": "available",
|
||||||
|
"schemeid": "dnet:dataCite_date",
|
||||||
|
"schemename": "dnet:dataCite_date"
|
||||||
|
},
|
||||||
|
"value": "2013-11-30"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"resourcetype": {
|
||||||
|
"classid": "journal article",
|
||||||
|
"classname": "journal article",
|
||||||
|
"schemeid": "dnet:dataCite_resource",
|
||||||
|
"schemename": "dnet:dataCite_resource"
|
||||||
|
},
|
||||||
|
"resulttype": {
|
||||||
|
"classid": "publication",
|
||||||
|
"classname": "publication",
|
||||||
|
"schemeid": "dnet:result_typologies",
|
||||||
|
"schemename": "dnet:result_typologies"
|
||||||
|
},
|
||||||
|
"source": [],
|
||||||
|
"subject": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "keyword",
|
||||||
|
"classname": "keyword",
|
||||||
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
|
},
|
||||||
|
"value": "JEL: H - Public Economics/H.H7 - State and Local Government • Intergovernmental Relations/H.H7.H72 - State and Local Budget and Expenditures"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "keyword",
|
||||||
|
"classname": "keyword",
|
||||||
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
|
},
|
||||||
|
"value": "Local public finance"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "keyword",
|
||||||
|
"classname": "keyword",
|
||||||
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
|
},
|
||||||
|
"value": "JEL: R - Urban, Rural, Regional, Real Estate, and Transportation Economics/R.R5 - Regional Government Analysis/R.R5.R51 - Finance in Urban and Rural Economies"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:repository",
|
||||||
|
"classname": "Harvested",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "main title",
|
||||||
|
"classname": "main title",
|
||||||
|
"schemeid": "dnet:dataCite_title",
|
||||||
|
"schemename": "dnet:dataCite_title"
|
||||||
|
},
|
||||||
|
"value": "Urban form and the costs of daily mobility. The spatialized travel account tool and its application to the Bordeaux metropolitan area"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
{
|
||||||
|
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "urn"
|
||||||
|
},
|
||||||
|
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "scp-number"
|
||||||
|
},
|
||||||
|
"value": "79953761260"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "pmcid"
|
||||||
|
},
|
||||||
|
"value": "21459329"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
|
@ -24,7 +24,7 @@
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
<id>scala-compile-first</id>
|
<id>scala-compile-first</id>
|
||||||
<phase>initialize</phase>
|
<phase>process-resources</phase>
|
||||||
<goals>
|
<goals>
|
||||||
<goal>add-source</goal>
|
<goal>add-source</goal>
|
||||||
<goal>compile</goal>
|
<goal>compile</goal>
|
||||||
|
@ -49,18 +49,16 @@
|
||||||
</build>
|
</build>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>edu.cmu</groupId>
|
<groupId>edu.cmu</groupId>
|
||||||
<artifactId>secondstring</artifactId>
|
<artifactId>secondstring</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>com.google.guava</groupId>
|
|
||||||
<artifactId>guava</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.google.code.gson</groupId>
|
|
||||||
<artifactId>gson</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
<artifactId>commons-lang3</artifactId>
|
<artifactId>commons-lang3</artifactId>
|
||||||
|
@ -85,10 +83,6 @@
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-databind</artifactId>
|
<artifactId>jackson-databind</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.commons</groupId>
|
|
||||||
<artifactId>commons-math3</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.jayway.jsonpath</groupId>
|
<groupId>com.jayway.jsonpath</groupId>
|
||||||
<artifactId>json-path</artifactId>
|
<artifactId>json-path</artifactId>
|
||||||
|
@ -107,4 +101,90 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
<profiles>
|
||||||
|
<profile>
|
||||||
|
<id>spark-24</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>true</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
<version>3.4.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>generate-sources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<sources>
|
||||||
|
<source>src/main/spark-2</source>
|
||||||
|
</sources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
|
|
||||||
|
<profile>
|
||||||
|
<id>spark-34</id>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
<version>3.4.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>generate-sources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<sources>
|
||||||
|
<source>src/main/spark-2</source>
|
||||||
|
</sources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
|
|
||||||
|
<profile>
|
||||||
|
<id>spark-35</id>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
<version>3.4.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>generate-sources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<sources>
|
||||||
|
<source>src/main/spark-35</source>
|
||||||
|
</sources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
|
</profiles>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -14,9 +14,9 @@ import eu.dnetlib.pace.config.Config;
|
||||||
|
|
||||||
public abstract class AbstractClusteringFunction extends AbstractPaceFunctions implements ClusteringFunction {
|
public abstract class AbstractClusteringFunction extends AbstractPaceFunctions implements ClusteringFunction {
|
||||||
|
|
||||||
protected Map<String, Integer> params;
|
protected Map<String, Object> params;
|
||||||
|
|
||||||
public AbstractClusteringFunction(final Map<String, Integer> params) {
|
public AbstractClusteringFunction(final Map<String, Object> params) {
|
||||||
this.params = params;
|
this.params = params;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ public abstract class AbstractClusteringFunction extends AbstractPaceFunctions i
|
||||||
return fields
|
return fields
|
||||||
.stream()
|
.stream()
|
||||||
.filter(f -> !f.isEmpty())
|
.filter(f -> !f.isEmpty())
|
||||||
.map(this::normalize)
|
.map(s -> normalize(s))
|
||||||
.map(s -> filterAllStopWords(s))
|
.map(s -> filterAllStopWords(s))
|
||||||
.map(s -> doApply(conf, s))
|
.map(s -> doApply(conf, s))
|
||||||
.map(c -> filterBlacklisted(c, ngramBlacklist))
|
.map(c -> filterBlacklisted(c, ngramBlacklist))
|
||||||
|
@ -36,11 +36,24 @@ public abstract class AbstractClusteringFunction extends AbstractPaceFunctions i
|
||||||
.collect(Collectors.toCollection(HashSet::new));
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Integer> getParams() {
|
public Map<String, Object> getParams() {
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Integer param(String name) {
|
protected Integer param(String name) {
|
||||||
return params.get(name);
|
Object val = params.get(name);
|
||||||
|
if (val == null)
|
||||||
|
return null;
|
||||||
|
if (val instanceof Number) {
|
||||||
|
return ((Number) val).intValue();
|
||||||
|
}
|
||||||
|
return Integer.parseInt(val.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int paramOrDefault(String name, int i) {
|
||||||
|
Integer res = param(name);
|
||||||
|
if (res == null)
|
||||||
|
res = i;
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,7 +13,7 @@ import eu.dnetlib.pace.config.Config;
|
||||||
@ClusteringClass("acronyms")
|
@ClusteringClass("acronyms")
|
||||||
public class Acronyms extends AbstractClusteringFunction {
|
public class Acronyms extends AbstractClusteringFunction {
|
||||||
|
|
||||||
public Acronyms(Map<String, Integer> params) {
|
public Acronyms(Map<String, Object> params) {
|
||||||
super(params);
|
super(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,6 @@ public interface ClusteringFunction {
|
||||||
|
|
||||||
public Collection<String> apply(Config config, List<String> fields);
|
public Collection<String> apply(Config config, List<String> fields);
|
||||||
|
|
||||||
public Map<String, Integer> getParams();
|
public Map<String, Object> getParams();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,7 +12,7 @@ import eu.dnetlib.pace.config.Config;
|
||||||
@ClusteringClass("immutablefieldvalue")
|
@ClusteringClass("immutablefieldvalue")
|
||||||
public class ImmutableFieldValue extends AbstractClusteringFunction {
|
public class ImmutableFieldValue extends AbstractClusteringFunction {
|
||||||
|
|
||||||
public ImmutableFieldValue(final Map<String, Integer> params) {
|
public ImmutableFieldValue(final Map<String, Object> params) {
|
||||||
super(params);
|
super(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,69 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.pace.clustering;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.jayway.jsonpath.Configuration;
|
||||||
|
import com.jayway.jsonpath.DocumentContext;
|
||||||
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
import com.jayway.jsonpath.Option;
|
||||||
|
|
||||||
|
import eu.dnetlib.pace.common.AbstractPaceFunctions;
|
||||||
|
import eu.dnetlib.pace.config.Config;
|
||||||
|
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||||
|
|
||||||
|
@ClusteringClass("jsonlistclustering")
|
||||||
|
public class JSONListClustering extends AbstractPaceFunctions implements ClusteringFunction {
|
||||||
|
|
||||||
|
private Map<String, Object> params;
|
||||||
|
|
||||||
|
public JSONListClustering(Map<String, Object> params) {
|
||||||
|
this.params = params;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, Object> getParams() {
|
||||||
|
return params;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<String> apply(Config conf, List<String> fields) {
|
||||||
|
return fields
|
||||||
|
.stream()
|
||||||
|
.filter(f -> !f.isEmpty())
|
||||||
|
.map(s -> doApply(conf, s))
|
||||||
|
.filter(StringUtils::isNotBlank)
|
||||||
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
|
}
|
||||||
|
|
||||||
|
private String doApply(Config conf, String json) {
|
||||||
|
StringBuilder st = new StringBuilder(); // to build the string used for comparisons basing on the jpath into
|
||||||
|
// parameters
|
||||||
|
final DocumentContext documentContext = JsonPath
|
||||||
|
.using(Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS))
|
||||||
|
.parse(json);
|
||||||
|
|
||||||
|
// for each path in the param list
|
||||||
|
for (String key : params.keySet().stream().filter(k -> k.contains("jpath")).collect(Collectors.toList())) {
|
||||||
|
String path = params.get(key).toString();
|
||||||
|
String value = MapDocumentUtil.getJPathString(path, documentContext);
|
||||||
|
if (value == null || value.isEmpty())
|
||||||
|
value = "";
|
||||||
|
st.append(value);
|
||||||
|
st.append(" ");
|
||||||
|
}
|
||||||
|
|
||||||
|
st.setLength(st.length() - 1);
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(st)) {
|
||||||
|
return "1";
|
||||||
|
}
|
||||||
|
return st.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -16,7 +16,7 @@ public class LastNameFirstInitial extends AbstractClusteringFunction {
|
||||||
|
|
||||||
private boolean DEFAULT_AGGRESSIVE = true;
|
private boolean DEFAULT_AGGRESSIVE = true;
|
||||||
|
|
||||||
public LastNameFirstInitial(final Map<String, Integer> params) {
|
public LastNameFirstInitial(final Map<String, Object> params) {
|
||||||
super(params);
|
super(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ public class LastNameFirstInitial extends AbstractClusteringFunction {
|
||||||
return fields
|
return fields
|
||||||
.stream()
|
.stream()
|
||||||
.filter(f -> !f.isEmpty())
|
.filter(f -> !f.isEmpty())
|
||||||
.map(this::normalize)
|
.map(LastNameFirstInitial::normalize)
|
||||||
.map(s -> doApply(conf, s))
|
.map(s -> doApply(conf, s))
|
||||||
.map(c -> filterBlacklisted(c, ngramBlacklist))
|
.map(c -> filterBlacklisted(c, ngramBlacklist))
|
||||||
.flatMap(c -> c.stream())
|
.flatMap(c -> c.stream())
|
||||||
|
@ -33,8 +33,7 @@ public class LastNameFirstInitial extends AbstractClusteringFunction {
|
||||||
.collect(Collectors.toCollection(HashSet::new));
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
public static String normalize(final String s) {
|
||||||
protected String normalize(final String s) {
|
|
||||||
return fixAliases(transliterate(nfd(unicodeNormalization(s))))
|
return fixAliases(transliterate(nfd(unicodeNormalization(s))))
|
||||||
// do not compact the regexes in a single expression, would cause StackOverflowError in case of large input
|
// do not compact the regexes in a single expression, would cause StackOverflowError in case of large input
|
||||||
// strings
|
// strings
|
||||||
|
|
|
@ -2,33 +2,43 @@
|
||||||
package eu.dnetlib.pace.clustering;
|
package eu.dnetlib.pace.clustering;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.pace.config.Config;
|
import eu.dnetlib.pace.config.Config;
|
||||||
|
|
||||||
@ClusteringClass("keywordsclustering")
|
@ClusteringClass("legalnameclustering")
|
||||||
public class KeywordsClustering extends AbstractClusteringFunction {
|
public class LegalnameClustering extends AbstractClusteringFunction {
|
||||||
|
|
||||||
public KeywordsClustering(Map<String, Integer> params) {
|
private static final Pattern CITY_CODE_PATTERN = Pattern.compile("city::\\d+");
|
||||||
|
private static final Pattern KEYWORD_CODE_PATTERN = Pattern.compile("key::\\d+");
|
||||||
|
|
||||||
|
public LegalnameClustering(Map<String, Object> params) {
|
||||||
super(params);
|
super(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Set<String> getRegexList(String input, Pattern codeRegex) {
|
||||||
|
Matcher matcher = codeRegex.matcher(input);
|
||||||
|
Set<String> cities = new HashSet<>();
|
||||||
|
while (matcher.find()) {
|
||||||
|
cities.add(matcher.group());
|
||||||
|
}
|
||||||
|
return cities;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Collection<String> doApply(final Config conf, String s) {
|
protected Collection<String> doApply(final Config conf, String s) {
|
||||||
|
|
||||||
// takes city codes and keywords codes without duplicates
|
|
||||||
Set<String> keywords = getKeywords(s, conf.translationMap(), params.getOrDefault("windowSize", 4));
|
|
||||||
Set<String> cities = getCities(s, params.getOrDefault("windowSize", 4));
|
|
||||||
|
|
||||||
// list of combination to return as result
|
// list of combination to return as result
|
||||||
final Collection<String> combinations = new LinkedHashSet<String>();
|
final Collection<String> combinations = new LinkedHashSet<String>();
|
||||||
|
|
||||||
for (String keyword : keywordsToCodes(keywords, conf.translationMap())) {
|
for (String keyword : getRegexList(s, KEYWORD_CODE_PATTERN)) {
|
||||||
for (String city : citiesToCodes(cities)) {
|
for (String city : getRegexList(s, CITY_CODE_PATTERN)) {
|
||||||
combinations.add(keyword + "-" + city);
|
combinations.add(keyword + "-" + city);
|
||||||
if (combinations.size() >= params.getOrDefault("max", 2)) {
|
if (combinations.size() >= paramOrDefault("max", 2)) {
|
||||||
return combinations;
|
return combinations;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -42,9 +52,6 @@ public class KeywordsClustering extends AbstractClusteringFunction {
|
||||||
return fields
|
return fields
|
||||||
.stream()
|
.stream()
|
||||||
.filter(f -> !f.isEmpty())
|
.filter(f -> !f.isEmpty())
|
||||||
.map(this::cleanup)
|
|
||||||
.map(this::normalize)
|
|
||||||
.map(s -> filterAllStopWords(s))
|
|
||||||
.map(s -> doApply(conf, s))
|
.map(s -> doApply(conf, s))
|
||||||
.map(c -> filterBlacklisted(c, ngramBlacklist))
|
.map(c -> filterBlacklisted(c, ngramBlacklist))
|
||||||
.flatMap(c -> c.stream())
|
.flatMap(c -> c.stream())
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue