forked from D-Net/dnet-hadoop
Compare commits
675 Commits
serializeL
...
beta
Author | SHA1 | Date |
---|---|---|
Andreas Czerniak | 1622b40842 | |
Andreas Czerniak | 854da6c0e7 | |
Andreas Czerniak | 2d0a4d5b85 | |
Andreas Czerniak | c777b50995 | |
Claudio Atzori | 97ad94d7d9 | |
Sandro La Bruzzo | 7a3819144d | |
Claudio Atzori | f0ea2410e5 | |
Claudio Atzori | b37bc277c4 | |
Miriam Baglioni | 42e8f76778 | |
Miriam Baglioni | a7c4d0d16d | |
Miriam Baglioni | 7bf12ad24a | |
Miriam Baglioni | a75fb8c47a | |
Miriam Baglioni | 4d517ed9ec | |
Miriam Baglioni | e7d5a39c03 | |
Claudio Atzori | dbd6fa1d65 | |
Miriam Baglioni | 4993666d73 | |
Sandro La Bruzzo | 1b9e8378b3 | |
Sandro La Bruzzo | 57e2c4b749 | |
Sandro La Bruzzo | b78d2b71f0 | |
Claudio Atzori | 0f2144b5e0 | |
Claudio Atzori | dcd282977c | |
Claudio Atzori | 4f212652ca | |
Sandro La Bruzzo | 0163dadb7f | |
Miriam Baglioni | 904e1c2667 | |
Miriam Baglioni | 064f9bbd87 | |
Miriam Baglioni | 93f26fb742 | |
Miriam Baglioni | b7e450070b | |
Miriam Baglioni | af8a33638d | |
Miriam Baglioni | 639190370a | |
Miriam Baglioni | adccc2346a | |
Claudio Atzori | 908294d86e | |
Claudio Atzori | 58f8998e3d | |
Claudio Atzori | 174c3037e1 | |
Claudio Atzori | 045d767013 | |
Claudio Atzori | cb30770a0b | |
Claudio Atzori | bd59b58efb | |
Claudio Atzori | a6977197b3 | |
Miriam Baglioni | 4c60ee1718 | |
Miriam Baglioni | 92fd69e25d | |
Claudio Atzori | fe7e5f4748 | |
Claudio Atzori | bcea4e3a9b | |
Miriam Baglioni | a706ba0c08 | |
Antonis Lempesis | 81ee654271 | |
Antonis Lempesis | 7551e52e95 | |
Miriam Baglioni | 7a1b440413 | |
Claudio Atzori | 278cf08421 | |
Claudio Atzori | cccb16900c | |
Miriam Baglioni | 2a67ee13ec | |
Miriam Baglioni | 5c4fee3533 | |
Miriam Baglioni | 69e9ea9eeb | |
Miriam Baglioni | 31b26d48ac | |
Miriam Baglioni | bf3a9505e0 | |
Miriam Baglioni | 10579c0dd0 | |
Miriam Baglioni | 6116fc5d40 | |
Miriam Baglioni | b81efb6a9d | |
Miriam Baglioni | 73175ba086 | |
Miriam Baglioni | de6c4c8968 | |
Miriam Baglioni | b352fbe453 | |
Miriam Baglioni | 34ac56565d | |
Miriam Baglioni | 20ef1d657f | |
Miriam Baglioni | 813f856d3f | |
Miriam Baglioni | 2c126ed014 | |
Miriam Baglioni | bf52a1847b | |
Miriam Baglioni | 0807fdb65a | |
Miriam Baglioni | b5e11a3a0a | |
Miriam Baglioni | c5739c4266 | |
Miriam Baglioni | da5f6260aa | |
Miriam Baglioni | 4849270c55 | |
Claudio Atzori | 8d18500069 | |
Miriam Baglioni | 9d19b057b8 | |
Miriam Baglioni | be0acccf42 | |
Miriam Baglioni | 89ea9fa0e1 | |
Antonis Lempesis | 16539d7360 | |
Antonis Lempesis | 3edd661608 | |
Antonis Lempesis | a4c0cbb98c | |
Miriam Baglioni | e24a7f3496 | |
Miriam Baglioni | d1ae219cb4 | |
Miriam Baglioni | 460e6b95d6 | |
Sandro La Bruzzo | 3920d68992 | |
Antonis Lempesis | 58996972d9 | |
dimitrispie | c1cdec09a9 | |
Miriam Baglioni | 3cc1b7b153 | |
Miriam Baglioni | 5e5dfd619c | |
Miriam Baglioni | 63b648b0dd | |
Sandro La Bruzzo | b881ee5ef8 | |
Sandro La Bruzzo | 63952018c0 | |
Sandro La Bruzzo | e5bff64f2e | |
Claudio Atzori | e30e5ac8a8 | |
Claudio Atzori | 1790fa2d44 | |
Miriam Baglioni | 56409d1281 | |
Miriam Baglioni | a3592b463a | |
Miriam Baglioni | 22d4b5619b | |
Miriam Baglioni | 6fb6236cd4 | |
Claudio Atzori | aff3ddc8d2 | |
Miriam Baglioni | 573bd17cbb | |
Miriam Baglioni | 4eb8276493 | |
Miriam Baglioni | 936578aaf1 | |
Miriam Baglioni | 8d755cca80 | |
Claudio Atzori | 98eb292c59 | |
Claudio Atzori | 5e17247bb6 | |
Claudio Atzori | b70ecccea0 | |
Claudio Atzori | 25dc7929a9 | |
Claudio Atzori | eb43eda42a | |
Claudio Atzori | 41c70c607d | |
Alessia Bardi | 8f1e018ceb | |
Alessia Bardi | cba63e9f82 | |
Alessia Bardi | e53228401b | |
Claudio Atzori | adf17452b0 | |
Claudio Atzori | e6e177dda0 | |
Alessia Bardi | 6b5d7688a4 | |
Miriam Baglioni | b113586207 | |
Sandro La Bruzzo | 5d51b3dd4a | |
Miriam Baglioni | d9836f0cf3 | |
Miriam Baglioni | d1df01ff1e | |
Sandro La Bruzzo | ed0c352799 | |
Miriam Baglioni | 96a7d46278 | |
Sandro La Bruzzo | e9f285ec4d | |
Sandro La Bruzzo | bf880e2508 | |
Sandro La Bruzzo | 81bf604059 | |
Sandro La Bruzzo | 7af0bbd0b1 | |
Claudio Atzori | 9132727793 | |
Claudio Atzori | 08795cbd30 | |
Miriam Baglioni | f430688ff7 | |
Miriam Baglioni | 4bb1d43afc | |
Sandro La Bruzzo | 0fa0ce33d6 | |
Sandro La Bruzzo | f7011b90d8 | |
Claudio Atzori | 372633880f | |
Claudio Atzori | dd0b2e5244 | |
Claudio Atzori | c4c705aa46 | |
Claudio Atzori | 863a2f9db3 | |
Claudio Atzori | 9cac283bec | |
Miriam Baglioni | d9f80488cc | |
Miriam Baglioni | 58bc3f223a | |
Miriam Baglioni | 8905a39bf3 | |
Miriam Baglioni | 87eedad898 | |
Claudio Atzori | 3b19821f3c | |
Claudio Atzori | cfa4560769 | |
Claudio Atzori | d85af6fc25 | |
Claudio Atzori | 4fe7888817 | |
Claudio Atzori | 01e5e0142a | |
Claudio Atzori | 0df9574a6f | |
Claudio Atzori | 014e872ae1 | |
Claudio Atzori | 5c6d328537 | |
dimitrispie | 09fc2afdca | |
dimitrispie | 8750a71502 | |
dimitrispie | 25fc8abf77 | |
Antonis Lempesis | 0b4163ee0b | |
dimitrispie | 29f69f2f89 | |
Sandro La Bruzzo | bb7f556eff | |
Sandro La Bruzzo | 1e1f5e4fe0 | |
Miriam Baglioni | ac07ed8251 | |
Miriam Baglioni | 5fd0e610bf | |
Sandro La Bruzzo | feea154e89 | |
Sandro La Bruzzo | 028a8acad8 | |
Sandro La Bruzzo | 2164a2a889 | |
Sandro La Bruzzo | a7cf277d98 | |
Sandro La Bruzzo | 483d3039d1 | |
Sandro La Bruzzo | 93fe8ce8b2 | |
Sandro La Bruzzo | 35e20b0647 | |
Miriam Baglioni | fdb75b180e | |
Miriam Baglioni | 0506fa2654 | |
Sandro La Bruzzo | 6110a2b984 | |
Sandro La Bruzzo | 65ebe1019b | |
Sandro La Bruzzo | 155d8bf83f | |
Sandro La Bruzzo | 3426451d3f | |
Sandro La Bruzzo | 75298ec442 | |
Sandro La Bruzzo | 4542a2338b | |
Claudio Atzori | 90c2a4987e | |
Claudio Atzori | e5a2c596b2 | |
Claudio Atzori | f4538f3c4c | |
Claudio Atzori | 2b46b87f56 | |
Miriam Baglioni | 9fae872181 | |
Sandro La Bruzzo | fc03c99805 | |
Sandro La Bruzzo | 8a7c7d36db | |
Sandro La Bruzzo | 0c0d561bc4 | |
Claudio Atzori | 62fa61f3cf | |
Claudio Atzori | bd9a43cefd | |
Claudio Atzori | 3974fa7dc1 | |
Claudio Atzori | a24b9f8268 | |
Claudio Atzori | c0750fb17c | |
Claudio Atzori | bb5dca7979 | |
Miriam Baglioni | 793b5a8e5f | |
Miriam Baglioni | 5dc5792722 | |
Miriam Baglioni | 0136a8c266 | |
Miriam Baglioni | 1b79c0ee79 | |
Claudio Atzori | 10a32f287f | |
Antonis Lempesis | cb3adb90f4 | |
Antonis Lempesis | c283406829 | |
Claudio Atzori | e0395719d7 | |
Claudio Atzori | 82a4e4efae | |
Miriam Baglioni | 6d4a1c57ee | |
Sandro La Bruzzo | 9c82d670b8 | |
Sandro La Bruzzo | 1f5ee116ed | |
Sandro La Bruzzo | 2fd9ceac13 | |
Sandro La Bruzzo | 60ae874dcb | |
Sandro La Bruzzo | 2506d7a679 | |
Sandro La Bruzzo | cded363b55 | |
Miriam Baglioni | 4094f2bb9a | |
Miriam Baglioni | ec8b0219ff | |
Miriam Baglioni | 2bbece2ca5 | |
Sandro La Bruzzo | 2d67020c59 | |
Claudio Atzori | 49f897ef29 | |
Miriam Baglioni | 28ea532ece | |
Sandro La Bruzzo | 18c1d70ef4 | |
Sandro La Bruzzo | a1cafaf2e3 | |
Miriam Baglioni | 7c96e3fd46 | |
Miriam Baglioni | c7c0c3187b | |
Miriam Baglioni | c6a9f0a1a8 | |
Miriam Baglioni | 99d86134f5 | |
Claudio Atzori | 0a727d325d | |
Claudio Atzori | bafa2990f3 | |
Claudio Atzori | 668ac25224 | |
Claudio Atzori | 7d0a03f607 | |
Claudio Atzori | 941a50a2fc | |
Claudio Atzori | 7c804acda8 | |
Sandro La Bruzzo | efa09057db | |
Sandro La Bruzzo | 48923e46a1 | |
Claudio Atzori | d2c787d416 | |
Claudio Atzori | 975b10b711 | |
Claudio Atzori | 1ecceea788 | |
Miriam Baglioni | 4ec88c718c | |
Miriam Baglioni | 6f1a434e90 | |
Miriam Baglioni | 157d33ebf9 | |
Claudio Atzori | 7b81607035 | |
Miriam Baglioni | 6595135a1a | |
Miriam Baglioni | 43cae4ad88 | |
Miriam Baglioni | b3f9370125 | |
Miriam Baglioni | 92d0e18b55 | |
Miriam Baglioni | 881113743f | |
Miriam Baglioni | 47ccb53c4f | |
Miriam Baglioni | ffb0ce1d59 | |
Miriam Baglioni | 716021546e | |
Claudio Atzori | 1f2a3d1af0 | |
Sandro La Bruzzo | 3469cc2b1d | |
Sandro La Bruzzo | a7763d2492 | |
Miriam Baglioni | b8bdabfae9 | |
Miriam Baglioni | e5498052e8 | |
Miriam Baglioni | 935062edec | |
Antonis Lempesis | 26f086dd64 | |
Claudio Atzori | 8bdca3413f | |
Claudio Atzori | 148289150f | |
Sandro La Bruzzo | 2ca0a436ad | |
Sandro La Bruzzo | 9cb195314f | |
Miriam Baglioni | 6d3c4c4abe | |
Miriam Baglioni | 8cc50ecee0 | |
Miriam Baglioni | 88b73f4f49 | |
Miriam Baglioni | c371b23077 | |
Miriam Baglioni | 9e214ce0eb | |
Sandro La Bruzzo | 6477a40670 | |
Miriam Baglioni | 6f7ca539c6 | |
Miriam Baglioni | a7d50c499b | |
Antonis Lempesis | 91354c6068 | |
Miriam Baglioni | 94918a673c | |
Miriam Baglioni | 4c70201412 | |
Miriam Baglioni | 8442efd8d1 | |
Claudio Atzori | 5681e89544 | |
Miriam Baglioni | a22c29fba1 | |
Miriam Baglioni | c10ff6928c | |
Miriam Baglioni | 0857849a86 | |
Miriam Baglioni | df7ee77c7a | |
Miriam Baglioni | de63d29b6f | |
Miriam Baglioni | d50057b2d9 | |
Miriam Baglioni | edf55395e9 | |
Miriam Baglioni | d97ea82a29 | |
Miriam Baglioni | 96769b4481 | |
Miriam Baglioni | 683fe093cf | |
Miriam Baglioni | b2bb8d9d79 | |
Miriam Baglioni | 779318961c | |
Miriam Baglioni | 2480e590d1 | |
Miriam Baglioni | b9d124bb7c | |
Sandro La Bruzzo | 7bd224f051 | |
Claudio Atzori | 7fa49f6956 | |
Antonis Lempesis | f78afb5ef9 | |
Miriam Baglioni | 2aca6bfa0a | |
Miriam Baglioni | 09f36cffb8 | |
Claudio Atzori | 1225ba0b92 | |
Sandro La Bruzzo | d9cbca83f7 | |
Sandro La Bruzzo | 1be9aa0a5f | |
Sandro La Bruzzo | 4acfa8fa2e | |
Miriam Baglioni | d0ef7d91c5 | |
Sandro La Bruzzo | aafdffa6b3 | |
Sandro La Bruzzo | 034304b33a | |
Claudio Atzori | 6b34ba737e | |
Claudio Atzori | d147295c2f | |
Claudio Atzori | 3702fe478d | |
Sandro La Bruzzo | ac36aa7d1c | |
Sandro La Bruzzo | aeeebd573b | |
Sandro La Bruzzo | ab3a99d3e9 | |
Sandro La Bruzzo | ae4e99a471 | |
Claudio Atzori | 4f8970f8ed | |
Claudio Atzori | 00b78b9c58 | |
Claudio Atzori | c01dd0c925 | |
Miriam Baglioni | 652114c641 | |
Claudio Atzori | d0cf2963f0 | |
Claudio Atzori | 59f76b50d4 | |
Claudio Atzori | bc3372093e | |
Antonis Lempesis | 241dcf6df1 | |
Claudio Atzori | 515e068a78 | |
Claudio Atzori | 512e7b0170 | |
Claudio Atzori | d517c71458 | |
Claudio Atzori | e9157c67aa | |
Claudio Atzori | 98f37c8d81 | |
Claudio Atzori | c8850456e9 | |
Sandro La Bruzzo | c9870c5122 | |
Sandro La Bruzzo | f8329bc110 | |
Claudio Atzori | 7a73010acd | |
Miriam Baglioni | c7f6cd2591 | |
Sandro La Bruzzo | a894d7adf3 | |
miconis | 5f780a6ba1 | |
Miriam Baglioni | 1315952702 | |
Miriam Baglioni | 1cc09adfaa | |
Miriam Baglioni | 76d41602be | |
Miriam Baglioni | 46f82c7c8f | |
Sandro La Bruzzo | 7b15b88d4c | |
Antonis Lempesis | 41ecb1eb61 | |
Antonis Lempesis | 4b7c8dff2d | |
Sandro La Bruzzo | 51a03c0a50 | |
Claudio Atzori | dd568ec88b | |
Claudio Atzori | 14fbf92ad6 | |
Claudio Atzori | b292e4a700 | |
miconis | 995c1eddaf | |
Miriam Baglioni | 5d9cc2452d | |
miconis | 326bf63775 | |
Miriam Baglioni | 16b28494a9 | |
Miriam Baglioni | 63933808d4 | |
Sandro La Bruzzo | f2c8356ccf | |
Sandro La Bruzzo | 7387416e90 | |
Claudio Atzori | 914b3e92cb | |
Sandro La Bruzzo | 511da98d0c | |
Miriam Baglioni | fec40bdd95 | |
Miriam Baglioni | 83f51f1812 | |
Sandro La Bruzzo | 5606014b17 | |
Claudio Atzori | 2f61054cd1 | |
Claudio Atzori | 83c90c7180 | |
Serafeim Chatzopoulos | 201ce71cc1 | |
Serafeim Chatzopoulos | e468a7b96b | |
Serafeim Chatzopoulos | de81007302 | |
Sandro La Bruzzo | 8f99d2af86 | |
Alessia Bardi | c48c43fa9e | |
Alessia Bardi | 8d3b60f446 | |
miconis | 611ca511db | |
miconis | 9646b9fd98 | |
Sandro La Bruzzo | 2557bb41f5 | |
Sandro La Bruzzo | b84e0cabeb | |
Sandro La Bruzzo | f258bbb927 | |
Sandro La Bruzzo | 991b06bd0b | |
Claudio Atzori | cb7efe12ac | |
Miriam Baglioni | e653756e3d | |
dimitrispie | 3f25d2efb2 | |
dimitrispie | 13687fd887 | |
Miriam Baglioni | 9814c3e700 | |
Miriam Baglioni | c4ccd7b32c | |
Miriam Baglioni | c8321ad31a | |
Claudio Atzori | 60a6a9a583 | |
Sandro La Bruzzo | 66702b1973 | |
Sandro La Bruzzo | 477cb10715 | |
Sandro La Bruzzo | be79d74e3d | |
Claudio Atzori | 35619b93ee | |
Claudio Atzori | 474117c2e8 | |
Miriam Baglioni | 476a4708d6 | |
Miriam Baglioni | 5ec69889db | |
Claudio Atzori | a53acfbc06 | |
Alessia Bardi | b924276e18 | |
Antonis Lempesis | a1e1cf32d7 | |
Antonis Lempesis | f358cabb2b | |
Miriam Baglioni | eedf7c3310 | |
Miriam Baglioni | f2118d771a | |
Claudio Atzori | df15a4dc9f | |
Claudio Atzori | 7fa60e166e | |
Antonis Lempesis | 421d55265d | |
Enrico Ottonello | 92a63f78fe | |
Enrico Ottonello | 0c74f5667e | |
miconis | 853333bdde | |
Antonis Lempesis | 8b681dcf1b | |
Claudio Atzori | 71cfa386bc | |
Antonis Lempesis | 2943287d10 | |
Antonis Lempesis | dd2329849f | |
Claudio Atzori | 09c2eb7f62 | |
Claudio Atzori | 954a16c213 | |
Miriam Baglioni | e9ccdf853f | |
Claudio Atzori | 12766bf5f2 | |
Claudio Atzori | 663b1556d7 | |
Claudio Atzori | ebf53a1616 | |
Enrico Ottonello | 8b804e7fe1 | |
Enrico Ottonello | aefa36c54b | |
Antonis Lempesis | de9bf3a161 | |
Antonis Lempesis | 9b1936701c | |
Antonis Lempesis | 8fc89ae822 | |
Antonis Lempesis | 461bf90ca6 | |
Antonis Lempesis | 43852bac0e | |
Antonis Lempesis | f13cca7e83 | |
Antonis Lempesis | c6ada217a1 | |
Antonis Lempesis | 1250ae197f | |
Antonis Lempesis | ccee451dde | |
Sandro La Bruzzo | aed29156c7 | |
Sandro La Bruzzo | 370dddb2fa | |
Sandro La Bruzzo | 3c6fc2096c | |
Sandro La Bruzzo | d4dadf6d77 | |
Sandro La Bruzzo | 9f8a80deb7 | |
Alessia Bardi | 3762b17f7b | |
Sandro La Bruzzo | e8b3cb9147 | |
Alessia Bardi | ccf4103a25 | |
Sandro La Bruzzo | 45898c71ac | |
Alessia Bardi | 00a28c0080 | |
Alessia Bardi | f19b04d41b | |
Alessia Bardi | 412d2cb16a | |
Alessia Bardi | 3bcac7e88c | |
Alessia Bardi | 931f430129 | |
Alessia Bardi | 4c1474e693 | |
Miriam Baglioni | 5f8ccbc365 | |
Miriam Baglioni | 882abb40e4 | |
Miriam Baglioni | 45c62609af | |
Miriam Baglioni | 35880c0e7b | |
Miriam Baglioni | f3b6c392c1 | |
Miriam Baglioni | 65822400ce | |
Alessia Bardi | a053e1513c | |
Alessia Bardi | 812bd54c57 | |
Miriam Baglioni | a65d3caaea | |
Miriam Baglioni | e5cf11d088 | |
Claudio Atzori | 7c0c67bdd6 | |
Claudio Atzori | 82086f3422 | |
Claudio Atzori | bc7068106c | |
Claudio Atzori | 2c0a05f11a | |
Claudio Atzori | d43667d857 | |
Miriam Baglioni | 5856ca8a7b | |
Miriam Baglioni | 6fec71e8d2 | |
Miriam Baglioni | ed7e28490a | |
Claudio Atzori | 7743d0f919 | |
Miriam Baglioni | 6eb7508995 | |
Claudio Atzori | f74adc4752 | |
Claudio Atzori | 5f0903d50d | |
Claudio Atzori | 17cefe6a97 | |
Claudio Atzori | 7ee2757fcd | |
Claudio Atzori | c3ad4ab701 | |
Claudio Atzori | baed5e3337 | |
Claudio Atzori | 3359f73fcf | |
Claudio Atzori | 4e6575a428 | |
Miriam Baglioni | f4ec81c92c | |
Miriam Baglioni | dc8b05b39e | |
Miriam Baglioni | 32fd75691f | |
Miriam Baglioni | dfd1e53c69 | |
Miriam Baglioni | 01db1f8bc4 | |
Miriam Baglioni | 964a46ca21 | |
Miriam Baglioni | eaf077fc34 | |
Miriam Baglioni | 5f674efb0c | |
Miriam Baglioni | 5cd5714530 | |
Miriam Baglioni | 58f241f4a2 | |
Miriam Baglioni | f3d575f749 | |
Miriam Baglioni | a5f6edfa6c | |
Miriam Baglioni | ed183d878e | |
Miriam Baglioni | 8769dd8eef | |
Miriam Baglioni | 6b9e1bf2e3 | |
Miriam Baglioni | d57b2bb927 | |
Miriam Baglioni | 9da74b544a | |
Miriam Baglioni | ab8abd61bb | |
Miriam Baglioni | 335a824e34 | |
Miriam Baglioni | f0845e9865 | |
Miriam Baglioni | 7a789423aa | |
Miriam Baglioni | e9fc3ef3bc | |
Miriam Baglioni | 4317211a2b | |
Miriam Baglioni | b62cd656a7 | |
Miriam Baglioni | d36e925277 | |
Miriam Baglioni | 7402daf51a | |
Miriam Baglioni | 733bcaecf6 | |
Miriam Baglioni | bfe8f5335c | |
Miriam Baglioni | 6e84b3951f | |
Claudio Atzori | e91ffcd2f3 | |
Claudio Atzori | 9587d4aee8 | |
Claudio Atzori | 86d940044c | |
Claudio Atzori | 8cdce59e0e | |
Miriam Baglioni | 08dd2b2102 | |
Miriam Baglioni | ac417ca798 | |
Miriam Baglioni | e33daaeee8 | |
Miriam Baglioni | 9650eea497 | |
Miriam Baglioni | 785db1d5b2 | |
Miriam Baglioni | 95e5482bbb | |
Miriam Baglioni | cc3d72df0e | |
Miriam Baglioni | b966329833 | |
Miriam Baglioni | 8ad7c71417 | |
Miriam Baglioni | 0e1a6bec20 | |
Miriam Baglioni | c6a2a780a9 | |
Miriam Baglioni | b6b58bba28 | |
Miriam Baglioni | 804589eb30 | |
Miriam Baglioni | d688749ad9 | |
Miriam Baglioni | 524c06e028 | |
Miriam Baglioni | 7aa3260729 | |
Miriam Baglioni | 55fc500d8d | |
Miriam Baglioni | f9b6b45d85 | |
Miriam Baglioni | 8229632839 | |
Miriam Baglioni | b1c6140ebf | |
Miriam Baglioni | 52c18c2697 | |
Miriam Baglioni | 8da3a25cf6 | |
Claudio Atzori | 9f4db73f30 | |
Claudio Atzori | 61d811ba53 | |
Claudio Atzori | 2ee21da43b | |
Miriam Baglioni | b954fe9ba8 | |
Miriam Baglioni | b688567db5 | |
Miriam Baglioni | 9731a6144a | |
Miriam Baglioni | a90bac3bc9 | |
Miriam Baglioni | bd0d7bfba7 | |
Miriam Baglioni | 8daaa32e90 | |
Miriam Baglioni | bc9e3a06ba | |
Claudio Atzori | d64a942a76 | |
Miriam Baglioni | 2efa5abda5 | |
Claudio Atzori | 577f3b1ac8 | |
Miriam Baglioni | da20fceaf7 | |
Claudio Atzori | 964f97ed4d | |
Miriam Baglioni | 54a6cbb244 | |
Miriam Baglioni | b7079804cb | |
Miriam Baglioni | a5f82f442b | |
Miriam Baglioni | b6dcf89d22 | |
Miriam Baglioni | eff499af9f | |
Claudio Atzori | a45b95ccc1 | |
Miriam Baglioni | 5d70f842eb | |
Miriam Baglioni | c3931557e3 | |
Claudio Atzori | 66f398fe6f | |
Miriam Baglioni | 6bd1eca7e0 | |
Miriam Baglioni | 73dc082927 | |
Miriam Baglioni | ee13da9258 | |
Miriam Baglioni | bd096f5170 | |
Miriam Baglioni | 5faeefbda8 | |
Miriam Baglioni | 1965e4eece | |
Claudio Atzori | 83c04e5d28 | |
Miriam Baglioni | b4eb026c8b | |
Miriam Baglioni | c7b71647c6 | |
Miriam Baglioni | eb8c3f8594 | |
Miriam Baglioni | e94ae0b1de | |
Miriam Baglioni | 67ba4c40e0 | |
Miriam Baglioni | eccf3851b0 | |
Sandro La Bruzzo | 74afe43c3a | |
Miriam Baglioni | 1e952cccf6 | |
Miriam Baglioni | 8ba8c77f92 | |
Miriam Baglioni | 8f7623e77a | |
Sandro La Bruzzo | 3fc820203b | |
Miriam Baglioni | a7bf314fd2 | |
Miriam Baglioni | 9831725073 | |
Miriam Baglioni | 100e54e6c8 | |
Miriam Baglioni | 461b8a29a0 | |
Miriam Baglioni | 327cddde33 | |
Miriam Baglioni | 17292c6641 | |
Miriam Baglioni | ee7ccb98dc | |
Miriam Baglioni | 90e91486e2 | |
Miriam Baglioni | 1e859706a3 | |
Miriam Baglioni | 72df8f9232 | |
Miriam Baglioni | ff1ce75e33 | |
Claudio Atzori | e826aae848 | |
Claudio Atzori | fd55c77d97 | |
Antonis Lempesis | 117c3d5c67 | |
Miriam Baglioni | 1695d45bd4 | |
Miriam Baglioni | 7c6ea2f4c7 | |
Miriam Baglioni | d8b9b0553b | |
Miriam Baglioni | 613bd3bde0 | |
Miriam Baglioni | d1807781c0 | |
Miriam Baglioni | 1d6ac3715b | |
Claudio Atzori | e244f73165 | |
Claudio Atzori | 11e26c020a | |
Claudio Atzori | 19620eed46 | |
Claudio Atzori | 5219d56be5 | |
Claudio Atzori | 4f78565c04 | |
Claudio Atzori | a6a38cca9e | |
Miriam Baglioni | 9bc4fd3b69 | |
Miriam Baglioni | 2fc89fc9b5 | |
Claudio Atzori | 081fe92a21 | |
Claudio Atzori | 576693d782 | |
Claudio Atzori | 55e6470f44 | |
Sandro La Bruzzo | 6358f92c3a | |
Antonis Lempesis | 26af0320d0 | |
Claudio Atzori | 7b172e7cd9 | |
Claudio Atzori | c53d106e80 | |
Claudio Atzori | 6e3554a45e | |
Sandro La Bruzzo | b1b0cc3f15 | |
Miriam Baglioni | baad01cadc | |
Claudio Atzori | e725c88ebb | |
Claudio Atzori | 5d08ad86ae | |
Claudio Atzori | e87e1805c4 | |
Claudio Atzori | f83dd70e1c | |
Claudio Atzori | 5f7330d407 | |
Claudio Atzori | 1923c1ce21 | |
Claudio Atzori | dc55ed4acd | |
Claudio Atzori | 908f57a475 | |
Sandro La Bruzzo | 3721df7aa6 | |
Michele Artini | 6aef3e8f46 | |
Antonis Lempesis | 4afa5215a9 | |
Antonis Lempesis | 3d1580fa9b | |
Claudio Atzori | 4c5a71ba2f | |
Claudio Atzori | a9961a1835 | |
Claudio Atzori | e1797c0a42 | |
Claudio Atzori | 6dddad86ee | |
Sandro La Bruzzo | 3d8f0f629b | |
Antonis Lempesis | 9b181ffa73 | |
Michele Artini | 3e2a2d6e71 | |
Alessia Bardi | 9594343725 | |
Michele Artini | 9f1c7b8e17 | |
Antonis Lempesis | 4a9741825d | |
Miriam Baglioni | 3d2bba3d5d | |
Miriam Baglioni | cc0d3d8a7b | |
Michele Artini | e6f1773d63 | |
Michele Artini | c72c960ffb | |
Michele Artini | 1fb572a33a | |
Miriam Baglioni | 708d0ade34 | |
Miriam Baglioni | 0424f47494 | |
Michele Artini | 52e2315ba2 | |
Claudio Atzori | d267dce520 | |
Claudio Atzori | 998b66855a | |
Antonis Lempesis | 1a28a69cac | |
Miriam Baglioni | 74f801b689 | |
Miriam Baglioni | 35e395eae8 | |
Miriam Baglioni | eb07f7f40f | |
Antonis Lempesis | ed185fd7ed | |
Antonis Lempesis | f3b9570354 | |
Antonis Lempesis | f9fbb0f261 | |
Claudio Atzori | 5b6844b969 | |
Claudio Atzori | ffdb2a3ea3 | |
Miriam Baglioni | 63553a76b3 | |
Alessia Bardi | 9069958479 | |
Claudio Atzori | 77e8c6c7f7 | |
Claudio Atzori | 5947cddafc | |
Miriam Baglioni | 13cf444f85 | |
Claudio Atzori | 5e5f65a3c3 | |
Miriam Baglioni | c4b18e6ccb | |
Miriam Baglioni | acd6056330 | |
Claudio Atzori | 9913b6073c | |
Enrico Ottonello | 2dc50c0999 | |
Enrico Ottonello | 66604bb2b4 | |
Enrico Ottonello | 7840cc6526 | |
Enrico Ottonello | a65667d217 | |
Sandro La Bruzzo | 10068c00ea | |
Miriam Baglioni | 1cdd09cd8e | |
Sandro La Bruzzo | 4cb65bc64a | |
Miriam Baglioni | 774cdb190e | |
Miriam Baglioni | 886617afd0 | |
Miriam Baglioni | 320cf02d96 | |
Miriam Baglioni | 52ce35d57b | |
Miriam Baglioni | 970b387b8d | |
Miriam Baglioni | eae10c5894 | |
Miriam Baglioni | c028feef4f | |
Miriam Baglioni | d70f8c96fd | |
Miriam Baglioni | 5e38c7f42d | |
Claudio Atzori | 734de62474 | |
Miriam Baglioni | d418c309f5 | |
Miriam Baglioni | 618d2de2da | |
Miriam Baglioni | 59615da65e | |
Miriam Baglioni | 084b4ef999 | |
Claudio Atzori | fa720c1da4 | |
Miriam Baglioni | 8f322a73cb | |
Miriam Baglioni | 72397ea1ba | |
Miriam Baglioni | 5295d10691 | |
Claudio Atzori | 9629569e22 | |
Claudio Atzori | f13e11e3f7 | |
Miriam Baglioni | e9a17ec899 | |
Miriam Baglioni | 8429aed6c6 | |
Miriam Baglioni | 39b1a6edf6 | |
Miriam Baglioni | 9a58f1b93d | |
Miriam Baglioni | 13c66e16be | |
Miriam Baglioni | 6410ab71d8 | |
Miriam Baglioni | 65a242646d | |
Miriam Baglioni | 4b432fbee8 | |
Miriam Baglioni | 87a6e2b967 | |
Miriam Baglioni | 69fd40fd30 | |
Miriam Baglioni | 86e50f7311 | |
Miriam Baglioni | da88c850c6 | |
Miriam Baglioni | 2f66fedfec | |
Miriam Baglioni | f5486ffb14 | |
Claudio Atzori | e0061232e9 | |
Claudio Atzori | 28a66af425 | |
Claudio Atzori | 783988af06 | |
Enrico Ottonello | abdd0ade1f | |
Enrico Ottonello | d0945c3c78 | |
Enrico Ottonello | 1265dadc90 | |
Enrico Ottonello | 0821d8e97d | |
Enrico Ottonello | ae7bd24d79 | |
Enrico Ottonello | 4d6c473bf1 | |
Claudio Atzori | ea9b00ce56 | |
Claudio Atzori | 2e70aa43f0 | |
Enrico Ottonello | e13926cdd0 |
|
@ -3,8 +3,6 @@
|
||||||
*.iws
|
*.iws
|
||||||
*.ipr
|
*.ipr
|
||||||
*.iml
|
*.iml
|
||||||
*.ipr
|
|
||||||
*.iws
|
|
||||||
*~
|
*~
|
||||||
.vscode
|
.vscode
|
||||||
.metals
|
.metals
|
||||||
|
@ -27,4 +25,4 @@ spark-warehouse
|
||||||
/**/job-override.properties
|
/**/job-override.properties
|
||||||
/**/*.log
|
/**/*.log
|
||||||
/**/.factorypath
|
/**/.factorypath
|
||||||
|
/**/.scalafmt.conf
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
# dnet-hadoop
|
# dnet-hadoop
|
||||||
Dnet-hadoop is a tool for
|
Dnet-hadoop is the project that defined all the OOZIE workflows for the OpenAIRE Graph construction, processing, provisioning.
|
|
@ -8,8 +8,6 @@ import java.util.List;
|
||||||
import org.apache.commons.lang.ArrayUtils;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.maven.plugin.AbstractMojo;
|
import org.apache.maven.plugin.AbstractMojo;
|
||||||
import org.apache.maven.plugin.MojoExecutionException;
|
|
||||||
import org.apache.maven.plugin.MojoFailureException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates oozie properties which were not provided from commandline.
|
* Generates oozie properties which were not provided from commandline.
|
||||||
|
@ -27,7 +25,7 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
|
||||||
};
|
};
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void execute() throws MojoExecutionException, MojoFailureException {
|
public void execute() {
|
||||||
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
|
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
|
||||||
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
|
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
|
||||||
String generatedSandboxName = generateSandboxName(
|
String generatedSandboxName = generateSandboxName(
|
||||||
|
@ -46,24 +44,24 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
|
||||||
/**
|
/**
|
||||||
* Generates sandbox name from workflow source directory.
|
* Generates sandbox name from workflow source directory.
|
||||||
*
|
*
|
||||||
* @param wfSourceDir
|
* @param wfSourceDir workflow source directory
|
||||||
* @return generated sandbox name
|
* @return generated sandbox name
|
||||||
*/
|
*/
|
||||||
private String generateSandboxName(String wfSourceDir) {
|
private String generateSandboxName(String wfSourceDir) {
|
||||||
// utilize all dir names until finding one of the limiters
|
// utilize all dir names until finding one of the limiters
|
||||||
List<String> sandboxNameParts = new ArrayList<String>();
|
List<String> sandboxNameParts = new ArrayList<>();
|
||||||
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
|
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
|
||||||
ArrayUtils.reverse(tokens);
|
ArrayUtils.reverse(tokens);
|
||||||
if (tokens.length > 0) {
|
if (tokens.length > 0) {
|
||||||
for (String token : tokens) {
|
for (String token : tokens) {
|
||||||
for (String limiter : limiters) {
|
for (String limiter : limiters) {
|
||||||
if (limiter.equals(token)) {
|
if (limiter.equals(token)) {
|
||||||
return sandboxNameParts.size() > 0
|
return !sandboxNameParts.isEmpty()
|
||||||
? StringUtils.join(sandboxNameParts.toArray())
|
? StringUtils.join(sandboxNameParts.toArray())
|
||||||
: null;
|
: null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (sandboxNameParts.size() > 0) {
|
if (!sandboxNameParts.isEmpty()) {
|
||||||
sandboxNameParts.add(0, File.separator);
|
sandboxNameParts.add(0, File.separator);
|
||||||
}
|
}
|
||||||
sandboxNameParts.add(0, token);
|
sandboxNameParts.add(0, token);
|
||||||
|
|
|
@ -16,6 +16,7 @@ import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -289,7 +290,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
*/
|
*/
|
||||||
protected List<String> getEscapeChars(String escapeChars) {
|
protected List<String> getEscapeChars(String escapeChars) {
|
||||||
List<String> tokens = getListFromCSV(escapeChars);
|
List<String> tokens = getListFromCSV(escapeChars);
|
||||||
List<String> realTokens = new ArrayList<String>();
|
List<String> realTokens = new ArrayList<>();
|
||||||
for (String token : tokens) {
|
for (String token : tokens) {
|
||||||
String realToken = getRealToken(token);
|
String realToken = getRealToken(token);
|
||||||
realTokens.add(realToken);
|
realTokens.add(realToken);
|
||||||
|
@ -324,7 +325,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
* @return content
|
* @return content
|
||||||
*/
|
*/
|
||||||
protected String getContent(String comment, Properties properties, List<String> escapeTokens) {
|
protected String getContent(String comment, Properties properties, List<String> escapeTokens) {
|
||||||
List<String> names = new ArrayList<String>(properties.stringPropertyNames());
|
List<String> names = new ArrayList<>(properties.stringPropertyNames());
|
||||||
Collections.sort(names);
|
Collections.sort(names);
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
if (!StringUtils.isBlank(comment)) {
|
if (!StringUtils.isBlank(comment)) {
|
||||||
|
@ -352,7 +353,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
throws MojoExecutionException {
|
throws MojoExecutionException {
|
||||||
try {
|
try {
|
||||||
String content = getContent(comment, properties, escapeTokens);
|
String content = getContent(comment, properties, escapeTokens);
|
||||||
FileUtils.writeStringToFile(file, content, ENCODING_UTF8);
|
FileUtils.writeStringToFile(file, content, StandardCharsets.UTF_8);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new MojoExecutionException("Error creating properties file", e);
|
throw new MojoExecutionException("Error creating properties file", e);
|
||||||
}
|
}
|
||||||
|
@ -399,9 +400,9 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
*/
|
*/
|
||||||
protected static final List<String> getListFromCSV(String csv) {
|
protected static final List<String> getListFromCSV(String csv) {
|
||||||
if (StringUtils.isBlank(csv)) {
|
if (StringUtils.isBlank(csv)) {
|
||||||
return new ArrayList<String>();
|
return new ArrayList<>();
|
||||||
}
|
}
|
||||||
List<String> list = new ArrayList<String>();
|
List<String> list = new ArrayList<>();
|
||||||
String[] tokens = StringUtils.split(csv, ",");
|
String[] tokens = StringUtils.split(csv, ",");
|
||||||
for (String token : tokens) {
|
for (String token : tokens) {
|
||||||
list.add(token.trim());
|
list.add(token.trim());
|
||||||
|
|
|
@ -9,18 +9,18 @@ import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
/** @author mhorst, claudio.atzori */
|
/** @author mhorst, claudio.atzori */
|
||||||
public class GenerateOoziePropertiesMojoTest {
|
class GenerateOoziePropertiesMojoTest {
|
||||||
|
|
||||||
private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
|
private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void clearSystemProperties() {
|
void clearSystemProperties() {
|
||||||
System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
|
System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
|
||||||
System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
|
System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteEmpty() throws Exception {
|
void testExecuteEmpty() throws Exception {
|
||||||
// execute
|
// execute
|
||||||
mojo.execute();
|
mojo.execute();
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteSandboxNameAlreadySet() throws Exception {
|
void testExecuteSandboxNameAlreadySet() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
|
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
|
||||||
String sandboxName = "originalSandboxName";
|
String sandboxName = "originalSandboxName";
|
||||||
|
@ -44,7 +44,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteEmptyWorkflowSourceDir() throws Exception {
|
void testExecuteEmptyWorkflowSourceDir() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "";
|
String workflowSourceDir = "";
|
||||||
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
||||||
|
@ -57,7 +57,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteNullSandboxNameGenerated() throws Exception {
|
void testExecuteNullSandboxNameGenerated() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "eu/dnetlib/dhp/";
|
String workflowSourceDir = "eu/dnetlib/dhp/";
|
||||||
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
||||||
|
@ -70,7 +70,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecute() throws Exception {
|
void testExecute() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
|
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
|
||||||
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
||||||
|
@ -83,7 +83,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithoutRoot() throws Exception {
|
void testExecuteWithoutRoot() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "wf/transformers";
|
String workflowSourceDir = "wf/transformers";
|
||||||
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
||||||
|
|
|
@ -20,7 +20,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
/** @author mhorst, claudio.atzori */
|
/** @author mhorst, claudio.atzori */
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class WritePredefinedProjectPropertiesTest {
|
class WritePredefinedProjectPropertiesTest {
|
||||||
|
|
||||||
@Mock
|
@Mock
|
||||||
private MavenProject mavenProject;
|
private MavenProject mavenProject;
|
||||||
|
@ -39,7 +39,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
// ----------------------------------- TESTS ---------------------------------------------
|
// ----------------------------------- TESTS ---------------------------------------------
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteEmpty() throws Exception {
|
void testExecuteEmpty() throws Exception {
|
||||||
// execute
|
// execute
|
||||||
mojo.execute();
|
mojo.execute();
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithProjectProperties() throws Exception {
|
void testExecuteWithProjectProperties() throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -70,7 +70,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test()
|
@Test()
|
||||||
public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) {
|
void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -84,7 +84,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
|
void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -108,7 +108,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
|
void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -132,7 +132,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
|
void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -164,7 +164,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
|
void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
|
@ -194,7 +194,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromBlankLocation() {
|
void testExecuteIncludingPropertyKeysFromBlankLocation() {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -214,7 +214,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
|
void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
|
@ -247,7 +247,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
|
void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
|
@ -273,7 +273,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception {
|
void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
mojo.setQuiet(true);
|
mojo.setQuiet(true);
|
||||||
mojo.setIncludePropertyKeysFromFiles(new String[] {
|
mojo.setIncludePropertyKeysFromFiles(new String[] {
|
||||||
|
@ -290,7 +290,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromInvalidFile() {
|
void testExecuteIncludingPropertyKeysFromInvalidFile() {
|
||||||
// given
|
// given
|
||||||
mojo.setIncludePropertyKeysFromFiles(new String[] {
|
mojo.setIncludePropertyKeysFromFiles(new String[] {
|
||||||
"invalid location"
|
"invalid location"
|
||||||
|
@ -301,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception {
|
void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
mojo.setIncludeEnvironmentVariables(true);
|
mojo.setIncludeEnvironmentVariables(true);
|
||||||
|
|
||||||
|
@ -318,7 +318,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception {
|
void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "systemPropertyKey";
|
String key = "systemPropertyKey";
|
||||||
String value = "systemPropertyValue";
|
String value = "systemPropertyValue";
|
||||||
|
@ -337,7 +337,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
|
void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "systemPropertyKey ";
|
String key = "systemPropertyKey ";
|
||||||
|
|
|
@ -22,9 +22,20 @@
|
||||||
<id>dnet45-releases</id>
|
<id>dnet45-releases</id>
|
||||||
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
|
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
|
||||||
</repository>
|
</repository>
|
||||||
|
<site>
|
||||||
|
<id>DHPSite</id>
|
||||||
|
<url>${dhp.site.stage.path}/dhp-build/dhp-code-style</url>
|
||||||
|
</site>
|
||||||
</distributionManagement>
|
</distributionManagement>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
<extensions>
|
||||||
|
<extension>
|
||||||
|
<groupId>org.apache.maven.wagon</groupId>
|
||||||
|
<artifactId>wagon-ssh</artifactId>
|
||||||
|
<version>2.10</version>
|
||||||
|
</extension>
|
||||||
|
</extensions>
|
||||||
<pluginManagement>
|
<pluginManagement>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
|
@ -35,7 +46,7 @@
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-site-plugin</artifactId>
|
<artifactId>maven-site-plugin</artifactId>
|
||||||
<version>3.7.1</version>
|
<version>3.9.1</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</pluginManagement>
|
</pluginManagement>
|
||||||
|
@ -43,6 +54,7 @@
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
<dhp.site.stage.path>sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop</dhp.site.stage.path>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
</project>
|
</project>
|
|
@ -0,0 +1,21 @@
|
||||||
|
style = defaultWithAlign
|
||||||
|
|
||||||
|
align.openParenCallSite = false
|
||||||
|
align.openParenDefnSite = false
|
||||||
|
align.tokens = [{code = "->"}, {code = "<-"}, {code = "=>", owner = "Case"}]
|
||||||
|
continuationIndent.callSite = 2
|
||||||
|
continuationIndent.defnSite = 2
|
||||||
|
danglingParentheses = true
|
||||||
|
indentOperator = spray
|
||||||
|
maxColumn = 120
|
||||||
|
newlines.alwaysBeforeTopLevelStatements = true
|
||||||
|
project.excludeFilters = [".*\\.sbt"]
|
||||||
|
rewrite.rules = [AvoidInfix]
|
||||||
|
rewrite.rules = [ExpandImportSelectors]
|
||||||
|
rewrite.rules = [RedundantBraces]
|
||||||
|
rewrite.rules = [RedundantParens]
|
||||||
|
rewrite.rules = [SortImports]
|
||||||
|
rewrite.rules = [SortModifiers]
|
||||||
|
rewrite.rules = [PreferCurlyFors]
|
||||||
|
spaces.inImportCurlyBraces = false
|
||||||
|
unindentTopLevelOperators = true
|
|
@ -0,0 +1,21 @@
|
||||||
|
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||||
|
<project xmlns="http://maven.apache.org/DECORATION/1.8.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/DECORATION/1.8.0 https://maven.apache.org/xsd/decoration-1.8.0.xsd"
|
||||||
|
name="DHP-Aggregation">
|
||||||
|
<skin>
|
||||||
|
<groupId>org.apache.maven.skins</groupId>
|
||||||
|
<artifactId>maven-fluido-skin</artifactId>
|
||||||
|
<version>1.8</version>
|
||||||
|
</skin>
|
||||||
|
<poweredBy>
|
||||||
|
<logo name="OpenAIRE Research Graph" href="https://graph.openaire.eu/"
|
||||||
|
img="https://graph.openaire.eu/assets/common-assets/logo-large-graph.png"/>
|
||||||
|
</poweredBy>
|
||||||
|
<body>
|
||||||
|
<links>
|
||||||
|
<item name="Code" href="https://code-repo.d4science.org/" />
|
||||||
|
</links>
|
||||||
|
<menu ref="modules" />
|
||||||
|
<menu ref="reports"/>
|
||||||
|
</body>
|
||||||
|
</project>
|
|
@ -10,6 +10,9 @@
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<description>This module is a container for the build tools used in dnet-hadoop</description>
|
<description>This module is a container for the build tools used in dnet-hadoop</description>
|
||||||
|
<properties>
|
||||||
|
<maven.javadoc.skip>true</maven.javadoc.skip>
|
||||||
|
</properties>
|
||||||
|
|
||||||
<modules>
|
<modules>
|
||||||
<module>dhp-code-style</module>
|
<module>dhp-code-style</module>
|
||||||
|
@ -17,4 +20,12 @@
|
||||||
<module>dhp-build-properties-maven-plugin</module>
|
<module>dhp-build-properties-maven-plugin</module>
|
||||||
</modules>
|
</modules>
|
||||||
|
|
||||||
|
|
||||||
|
<distributionManagement>
|
||||||
|
<site>
|
||||||
|
<id>DHPSite</id>
|
||||||
|
<url>${dhp.site.stage.path}/dhp-build/</url>
|
||||||
|
</site>
|
||||||
|
</distributionManagement>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||||
|
<project xmlns="http://maven.apache.org/DECORATION/1.8.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/DECORATION/1.8.0 https://maven.apache.org/xsd/decoration-1.8.0.xsd"
|
||||||
|
name="DHP-Aggregation">
|
||||||
|
<skin>
|
||||||
|
<groupId>org.apache.maven.skins</groupId>
|
||||||
|
<artifactId>maven-fluido-skin</artifactId>
|
||||||
|
<version>1.8</version>
|
||||||
|
</skin>
|
||||||
|
<poweredBy>
|
||||||
|
<logo name="OpenAIRE Research Graph" href="https://graph.openaire.eu/"
|
||||||
|
img="https://graph.openaire.eu/assets/common-assets/logo-large-graph.png"/>
|
||||||
|
</poweredBy>
|
||||||
|
<body>
|
||||||
|
<links>
|
||||||
|
<item name="Code" href="https://code-repo.d4science.org/" />
|
||||||
|
</links>
|
||||||
|
|
||||||
|
<menu ref="modules" />
|
||||||
|
<menu ref="reports"/>
|
||||||
|
</body>
|
||||||
|
</project>
|
|
@ -13,7 +13,51 @@
|
||||||
<artifactId>dhp-common</artifactId>
|
<artifactId>dhp-common</artifactId>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<distributionManagement>
|
||||||
|
<site>
|
||||||
|
<id>DHPSite</id>
|
||||||
|
<url>${dhp.site.stage.path}/dhp-common</url>
|
||||||
|
</site>
|
||||||
|
</distributionManagement>
|
||||||
|
|
||||||
<description>This module contains common utilities meant to be used across the dnet-hadoop submodules</description>
|
<description>This module contains common utilities meant to be used across the dnet-hadoop submodules</description>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>net.alchim31.maven</groupId>
|
||||||
|
<artifactId>scala-maven-plugin</artifactId>
|
||||||
|
<version>${net.alchim31.maven.version}</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>scala-compile-first</id>
|
||||||
|
<phase>initialize</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
<goal>compile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>scala-test-compile</id>
|
||||||
|
<phase>process-test-resources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>testCompile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>scala-doc</id>
|
||||||
|
<phase>process-resources</phase> <!-- or wherever -->
|
||||||
|
<goals>
|
||||||
|
<goal>doc</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
<configuration>
|
||||||
|
<scalaVersion>${scala.version}</scalaVersion>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
|
||||||
|
</build>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
||||||
|
@ -25,6 +69,11 @@
|
||||||
<groupId>com.github.sisyphsu</groupId>
|
<groupId>com.github.sisyphsu</groupId>
|
||||||
<artifactId>dateparser</artifactId>
|
<artifactId>dateparser</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>me.xuender</groupId>
|
||||||
|
<artifactId>unidecode</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
@ -112,6 +161,11 @@
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-schemas</artifactId>
|
<artifactId>dhp-schemas</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.opencsv</groupId>
|
||||||
|
<artifactId>opencsv</artifactId>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -1,14 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.application;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
|
|
||||||
import com.google.common.collect.Maps;
|
|
||||||
|
|
||||||
public class ApplicationUtils {
|
|
||||||
|
|
||||||
}
|
|
|
@ -56,13 +56,13 @@ public class ArgumentApplicationParser implements Serializable {
|
||||||
final StringWriter stringWriter = new StringWriter();
|
final StringWriter stringWriter = new StringWriter();
|
||||||
IOUtils.copy(gis, stringWriter);
|
IOUtils.copy(gis, stringWriter);
|
||||||
return stringWriter.toString();
|
return stringWriter.toString();
|
||||||
} catch (Throwable e) {
|
} catch (IOException e) {
|
||||||
log.error("Wrong value to decompress:" + abstractCompressed);
|
log.error("Wrong value to decompress: {}", abstractCompressed);
|
||||||
throw new RuntimeException(e);
|
throw new IllegalArgumentException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String compressArgument(final String value) throws Exception {
|
public static String compressArgument(final String value) throws IOException {
|
||||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
GZIPOutputStream gzip = new GZIPOutputStream(out);
|
GZIPOutputStream gzip = new GZIPOutputStream(out);
|
||||||
gzip.write(value.getBytes());
|
gzip.write(value.getBytes());
|
||||||
|
|
|
@ -9,9 +9,6 @@ public class OptionsParameter {
|
||||||
private boolean paramRequired;
|
private boolean paramRequired;
|
||||||
private boolean compressed;
|
private boolean compressed;
|
||||||
|
|
||||||
public OptionsParameter() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getParamName() {
|
public String getParamName() {
|
||||||
return paramName;
|
return paramName;
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@ public class ApiDescriptor {
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setParams(final HashMap<String, String> params) {
|
public void setParams(final Map<String, String> params) {
|
||||||
this.params = params;
|
this.params = params;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,9 @@ public class Constants {
|
||||||
|
|
||||||
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
||||||
|
|
||||||
|
private Constants() {
|
||||||
|
}
|
||||||
|
|
||||||
static {
|
static {
|
||||||
accessRightsCoarMap.put("OPEN", "c_abf2");
|
accessRightsCoarMap.put("OPEN", "c_abf2");
|
||||||
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
||||||
|
@ -49,4 +52,10 @@ public class Constants {
|
||||||
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
|
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
|
||||||
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
|
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
|
||||||
|
|
||||||
|
// IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages
|
||||||
|
// see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html
|
||||||
|
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit";
|
||||||
|
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining";
|
||||||
|
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,412 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.*;
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance;
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Journal;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
|
|
||||||
public class GraphResultMapper implements Serializable {
|
|
||||||
|
|
||||||
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
|
|
||||||
E in) {
|
|
||||||
|
|
||||||
CommunityResult out = new CommunityResult();
|
|
||||||
|
|
||||||
eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in;
|
|
||||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> ort = Optional.ofNullable(input.getResulttype());
|
|
||||||
if (ort.isPresent()) {
|
|
||||||
switch (ort.get().getClassid()) {
|
|
||||||
case "publication":
|
|
||||||
Optional<Journal> journal = Optional
|
|
||||||
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal());
|
|
||||||
if (journal.isPresent()) {
|
|
||||||
Journal j = journal.get();
|
|
||||||
Container c = new Container();
|
|
||||||
c.setConferencedate(j.getConferencedate());
|
|
||||||
c.setConferenceplace(j.getConferenceplace());
|
|
||||||
c.setEdition(j.getEdition());
|
|
||||||
c.setEp(j.getEp());
|
|
||||||
c.setIss(j.getIss());
|
|
||||||
c.setIssnLinking(j.getIssnLinking());
|
|
||||||
c.setIssnOnline(j.getIssnOnline());
|
|
||||||
c.setIssnPrinted(j.getIssnPrinted());
|
|
||||||
c.setName(j.getName());
|
|
||||||
c.setSp(j.getSp());
|
|
||||||
c.setVol(j.getVol());
|
|
||||||
out.setContainer(c);
|
|
||||||
out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname());
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case "dataset":
|
|
||||||
eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
|
|
||||||
Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue()));
|
|
||||||
Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue()));
|
|
||||||
|
|
||||||
out
|
|
||||||
.setGeolocation(
|
|
||||||
Optional
|
|
||||||
.ofNullable(id.getGeolocation())
|
|
||||||
.map(
|
|
||||||
igl -> igl
|
|
||||||
.stream()
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.map(gli -> {
|
|
||||||
GeoLocation gl = new GeoLocation();
|
|
||||||
gl.setBox(gli.getBox());
|
|
||||||
gl.setPlace(gli.getPlace());
|
|
||||||
gl.setPoint(gli.getPoint());
|
|
||||||
return gl;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()))
|
|
||||||
.orElse(null));
|
|
||||||
|
|
||||||
out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname());
|
|
||||||
break;
|
|
||||||
case "software":
|
|
||||||
|
|
||||||
eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
|
|
||||||
Optional
|
|
||||||
.ofNullable(is.getCodeRepositoryUrl())
|
|
||||||
.ifPresent(value -> out.setCodeRepositoryUrl(value.getValue()));
|
|
||||||
Optional
|
|
||||||
.ofNullable(is.getDocumentationUrl())
|
|
||||||
.ifPresent(
|
|
||||||
value -> out
|
|
||||||
.setDocumentationUrl(
|
|
||||||
value
|
|
||||||
.stream()
|
|
||||||
.map(v -> v.getValue())
|
|
||||||
.collect(Collectors.toList())));
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(is.getProgrammingLanguage())
|
|
||||||
.ifPresent(value -> out.setProgrammingLanguage(value.getClassid()));
|
|
||||||
|
|
||||||
out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname());
|
|
||||||
break;
|
|
||||||
case "other":
|
|
||||||
|
|
||||||
eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input;
|
|
||||||
out
|
|
||||||
.setContactgroup(
|
|
||||||
Optional
|
|
||||||
.ofNullable(ir.getContactgroup())
|
|
||||||
.map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList()))
|
|
||||||
.orElse(null));
|
|
||||||
|
|
||||||
out
|
|
||||||
.setContactperson(
|
|
||||||
Optional
|
|
||||||
.ofNullable(ir.getContactperson())
|
|
||||||
.map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList()))
|
|
||||||
.orElse(null));
|
|
||||||
out
|
|
||||||
.setTool(
|
|
||||||
Optional
|
|
||||||
.ofNullable(ir.getTool())
|
|
||||||
.map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList()))
|
|
||||||
.orElse(null));
|
|
||||||
|
|
||||||
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getAuthor())
|
|
||||||
.ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList())));
|
|
||||||
|
|
||||||
// I do not map Access Right UNKNOWN or OTHER
|
|
||||||
|
|
||||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
|
|
||||||
if (oar.isPresent()) {
|
|
||||||
if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
|
|
||||||
String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
|
|
||||||
out
|
|
||||||
.setBestaccessright(
|
|
||||||
AccessRight
|
|
||||||
.newInstance(
|
|
||||||
code,
|
|
||||||
Constants.coarCodeLabelMap.get(code),
|
|
||||||
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final List<String> contributorList = new ArrayList<>();
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getContributor())
|
|
||||||
.ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue())));
|
|
||||||
out.setContributor(contributorList);
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getCountry())
|
|
||||||
.ifPresent(
|
|
||||||
value -> out
|
|
||||||
.setCountry(
|
|
||||||
value
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
c -> {
|
|
||||||
if (c.getClassid().equals((ModelConstants.UNKNOWN))) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
Country country = new Country();
|
|
||||||
country.setCode(c.getClassid());
|
|
||||||
country.setLabel(c.getClassname());
|
|
||||||
Optional
|
|
||||||
.ofNullable(c.getDataInfo())
|
|
||||||
.ifPresent(
|
|
||||||
provenance -> country
|
|
||||||
.setProvenance(
|
|
||||||
Provenance
|
|
||||||
.newInstance(
|
|
||||||
provenance
|
|
||||||
.getProvenanceaction()
|
|
||||||
.getClassname(),
|
|
||||||
c.getDataInfo().getTrust())));
|
|
||||||
return country;
|
|
||||||
})
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.collect(Collectors.toList())));
|
|
||||||
|
|
||||||
final List<String> coverageList = new ArrayList<>();
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getCoverage())
|
|
||||||
.ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue())));
|
|
||||||
out.setCoverage(coverageList);
|
|
||||||
|
|
||||||
out.setDateofcollection(input.getDateofcollection());
|
|
||||||
|
|
||||||
final List<String> descriptionList = new ArrayList<>();
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getDescription())
|
|
||||||
.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
|
|
||||||
out.setDescription(descriptionList);
|
|
||||||
Optional<Field<String>> oStr = Optional.ofNullable(input.getEmbargoenddate());
|
|
||||||
if (oStr.isPresent()) {
|
|
||||||
out.setEmbargoenddate(oStr.get().getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
final List<String> formatList = new ArrayList<>();
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getFormat())
|
|
||||||
.ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
|
|
||||||
out.setFormat(formatList);
|
|
||||||
out.setId(input.getId());
|
|
||||||
out.setOriginalId(input.getOriginalId());
|
|
||||||
|
|
||||||
Optional<List<eu.dnetlib.dhp.schema.oaf.Instance>> oInst = Optional
|
|
||||||
.ofNullable(input.getInstance());
|
|
||||||
|
|
||||||
if (oInst.isPresent()) {
|
|
||||||
out
|
|
||||||
.setInstance(
|
|
||||||
oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList()));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oL = Optional.ofNullable(input.getLanguage());
|
|
||||||
if (oL.isPresent()) {
|
|
||||||
eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get();
|
|
||||||
out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname()));
|
|
||||||
}
|
|
||||||
Optional<Long> oLong = Optional.ofNullable(input.getLastupdatetimestamp());
|
|
||||||
if (oLong.isPresent()) {
|
|
||||||
out.setLastupdatetimestamp(oLong.get());
|
|
||||||
}
|
|
||||||
Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
|
|
||||||
if (otitle.isPresent()) {
|
|
||||||
List<StructuredProperty> iTitle = otitle
|
|
||||||
.get()
|
|
||||||
.stream()
|
|
||||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
if (iTitle.size() > 0) {
|
|
||||||
out.setMaintitle(iTitle.get(0).getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
iTitle = otitle
|
|
||||||
.get()
|
|
||||||
.stream()
|
|
||||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
if (iTitle.size() > 0) {
|
|
||||||
out.setSubtitle(iTitle.get(0).getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
List<ControlledField> pids = new ArrayList<>();
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getPid())
|
|
||||||
.ifPresent(
|
|
||||||
value -> value
|
|
||||||
.stream()
|
|
||||||
.forEach(
|
|
||||||
p -> pids
|
|
||||||
.add(
|
|
||||||
ControlledField
|
|
||||||
.newInstance(p.getQualifier().getClassid(), p.getValue()))));
|
|
||||||
out.setPid(pids);
|
|
||||||
oStr = Optional.ofNullable(input.getDateofacceptance());
|
|
||||||
if (oStr.isPresent()) {
|
|
||||||
out.setPublicationdate(oStr.get().getValue());
|
|
||||||
}
|
|
||||||
oStr = Optional.ofNullable(input.getPublisher());
|
|
||||||
if (oStr.isPresent()) {
|
|
||||||
out.setPublisher(oStr.get().getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
List<String> sourceList = new ArrayList<>();
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getSource())
|
|
||||||
.ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue())));
|
|
||||||
// out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
|
|
||||||
List<Subject> subjectList = new ArrayList<>();
|
|
||||||
Optional
|
|
||||||
.ofNullable(input.getSubject())
|
|
||||||
.ifPresent(
|
|
||||||
value -> value
|
|
||||||
.forEach(s -> subjectList.add(getSubject(s))));
|
|
||||||
|
|
||||||
out.setSubjects(subjectList);
|
|
||||||
|
|
||||||
out.setType(input.getResulttype().getClassid());
|
|
||||||
}
|
|
||||||
|
|
||||||
out
|
|
||||||
.setCollectedfrom(
|
|
||||||
input
|
|
||||||
.getCollectedfrom()
|
|
||||||
.stream()
|
|
||||||
.map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue()))
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
|
|
||||||
return out;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static CommunityInstance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
|
|
||||||
CommunityInstance instance = new CommunityInstance();
|
|
||||||
|
|
||||||
setCommonValue(i, instance);
|
|
||||||
|
|
||||||
instance
|
|
||||||
.setCollectedfrom(
|
|
||||||
KeyValue
|
|
||||||
.newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));
|
|
||||||
|
|
||||||
instance
|
|
||||||
.setHostedby(
|
|
||||||
KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue()));
|
|
||||||
|
|
||||||
return instance;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
|
|
||||||
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> opAr = Optional
|
|
||||||
.ofNullable(i.getAccessright());
|
|
||||||
if (opAr.isPresent()) {
|
|
||||||
if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
|
|
||||||
String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
|
|
||||||
instance
|
|
||||||
.setAccessright(
|
|
||||||
AccessRight
|
|
||||||
.newInstance(
|
|
||||||
code,
|
|
||||||
Constants.coarCodeLabelMap.get(code),
|
|
||||||
Constants.COAR_ACCESS_RIGHT_SCHEMA));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Optional
|
|
||||||
.ofNullable(i.getLicense())
|
|
||||||
.ifPresent(value -> instance.setLicense(value.getValue()));
|
|
||||||
Optional
|
|
||||||
.ofNullable(i.getDateofacceptance())
|
|
||||||
.ifPresent(value -> instance.setPublicationdate(value.getValue()));
|
|
||||||
Optional
|
|
||||||
.ofNullable(i.getRefereed())
|
|
||||||
.ifPresent(value -> instance.setRefereed(value.getClassname()));
|
|
||||||
Optional
|
|
||||||
.ofNullable(i.getInstancetype())
|
|
||||||
.ifPresent(value -> instance.setType(value.getClassname()));
|
|
||||||
Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Subject getSubject(StructuredProperty s) {
|
|
||||||
Subject subject = new Subject();
|
|
||||||
subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue()));
|
|
||||||
Optional<DataInfo> di = Optional.ofNullable(s.getDataInfo());
|
|
||||||
if (di.isPresent()) {
|
|
||||||
Provenance p = new Provenance();
|
|
||||||
p.setProvenance(di.get().getProvenanceaction().getClassname());
|
|
||||||
p.setTrust(di.get().getTrust());
|
|
||||||
subject.setProvenance(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
return subject;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
|
|
||||||
Author a = new Author();
|
|
||||||
a.setFullname(oa.getFullname());
|
|
||||||
a.setName(oa.getName());
|
|
||||||
a.setSurname(oa.getSurname());
|
|
||||||
a.setRank(oa.getRank());
|
|
||||||
|
|
||||||
Optional<List<StructuredProperty>> oPids = Optional
|
|
||||||
.ofNullable(oa.getPid());
|
|
||||||
if (oPids.isPresent()) {
|
|
||||||
Pid pid = getOrcid(oPids.get());
|
|
||||||
if (pid != null) {
|
|
||||||
a.setPid(pid);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Pid getOrcid(List<StructuredProperty> p) {
|
|
||||||
for (StructuredProperty pid : p) {
|
|
||||||
if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
|
|
||||||
Optional<DataInfo> di = Optional.ofNullable(pid.getDataInfo());
|
|
||||||
if (di.isPresent()) {
|
|
||||||
return Pid
|
|
||||||
.newInstance(
|
|
||||||
ControlledField
|
|
||||||
.newInstance(
|
|
||||||
pid.getQualifier().getClassid(),
|
|
||||||
pid.getValue()),
|
|
||||||
Provenance
|
|
||||||
.newInstance(
|
|
||||||
di.get().getProvenanceaction().getClassname(),
|
|
||||||
di.get().getTrust()));
|
|
||||||
} else {
|
|
||||||
return Pid
|
|
||||||
.newInstance(
|
|
||||||
ControlledField
|
|
||||||
.newInstance(
|
|
||||||
pid.getQualifier().getClassid(),
|
|
||||||
pid.getValue())
|
|
||||||
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -28,7 +28,7 @@ public class HdfsSupport {
|
||||||
* @param configuration Configuration of hadoop env
|
* @param configuration Configuration of hadoop env
|
||||||
*/
|
*/
|
||||||
public static boolean exists(String path, Configuration configuration) {
|
public static boolean exists(String path, Configuration configuration) {
|
||||||
logger.info("Removing path: {}", path);
|
logger.info("Checking existence for path: {}", path);
|
||||||
return rethrowAsRuntimeException(
|
return rethrowAsRuntimeException(
|
||||||
() -> {
|
() -> {
|
||||||
Path f = new Path(path);
|
Path f = new Path(path);
|
||||||
|
|
|
@ -14,38 +14,33 @@ public class MakeTarArchive implements Serializable {
|
||||||
|
|
||||||
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
|
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
|
||||||
Path hdfsWritePath = new Path(outputPath);
|
Path hdfsWritePath = new Path(outputPath);
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsWritePath)) {
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
fileSystem.delete(hdfsWritePath, true);
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
|
|
||||||
}
|
}
|
||||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream());
|
||||||
|
|
||||||
return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
|
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
Path hdfsWritePath = new Path(outputPath);
|
Path hdfsWritePath = new Path(outputPath);
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsWritePath)) {
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
fileSystem.delete(hdfsWritePath, true);
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
|
|
||||||
}
|
}
|
||||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
try (TarArchiveOutputStream ar = new TarArchiveOutputStream(
|
||||||
|
fileSystem.create(hdfsWritePath).getWrappedStream())) {
|
||||||
|
|
||||||
TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
|
RemoteIterator<LocatedFileStatus> iterator = fileSystem
|
||||||
|
|
||||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
|
||||||
.listFiles(
|
.listFiles(
|
||||||
new Path(inputPath), true);
|
new Path(inputPath), true);
|
||||||
|
|
||||||
while (fileStatusListIterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0);
|
writeCurrentFile(fileSystem, dir_name, iterator, ar, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
ar.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,
|
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,
|
||||||
|
@ -90,6 +85,13 @@ public class MakeTarArchive implements Serializable {
|
||||||
String p_string = p.toString();
|
String p_string = p.toString();
|
||||||
if (!p_string.endsWith("_SUCCESS")) {
|
if (!p_string.endsWith("_SUCCESS")) {
|
||||||
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||||
|
if (name.startsWith("part-") & name.length() > 10) {
|
||||||
|
String tmp = name.substring(0, 10);
|
||||||
|
if (name.contains(".")) {
|
||||||
|
tmp += name.substring(name.indexOf("."));
|
||||||
|
}
|
||||||
|
name = tmp;
|
||||||
|
}
|
||||||
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
|
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
|
||||||
entry.setSize(fileStatus.getLen());
|
entry.setSize(fileStatus.getLen());
|
||||||
current_size += fileStatus.getLen();
|
current_size += fileStatus.getLen();
|
||||||
|
|
|
@ -10,8 +10,6 @@ import java.util.Optional;
|
||||||
import java.util.stream.StreamSupport;
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.bson.Document;
|
import org.bson.Document;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -21,6 +19,7 @@ import com.mongodb.BasicDBObject;
|
||||||
import com.mongodb.MongoClient;
|
import com.mongodb.MongoClient;
|
||||||
import com.mongodb.MongoClientURI;
|
import com.mongodb.MongoClientURI;
|
||||||
import com.mongodb.QueryBuilder;
|
import com.mongodb.QueryBuilder;
|
||||||
|
import com.mongodb.client.FindIterable;
|
||||||
import com.mongodb.client.MongoCollection;
|
import com.mongodb.client.MongoCollection;
|
||||||
import com.mongodb.client.MongoDatabase;
|
import com.mongodb.client.MongoDatabase;
|
||||||
|
|
||||||
|
@ -46,7 +45,7 @@ public class MdstoreClient implements Closeable {
|
||||||
|
|
||||||
final String currentId = Optional
|
final String currentId = Optional
|
||||||
.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
|
.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
|
||||||
.map(r -> r.first())
|
.map(FindIterable::first)
|
||||||
.map(d -> d.getString("currentId"))
|
.map(d -> d.getString("currentId"))
|
||||||
.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
|
.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
|
||||||
|
|
||||||
|
@ -84,7 +83,7 @@ public class MdstoreClient implements Closeable {
|
||||||
if (!Iterables.contains(client.listDatabaseNames(), dbName)) {
|
if (!Iterables.contains(client.listDatabaseNames(), dbName)) {
|
||||||
final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress());
|
final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress());
|
||||||
log.warn(err);
|
log.warn(err);
|
||||||
throw new RuntimeException(err);
|
throw new IllegalArgumentException(err);
|
||||||
}
|
}
|
||||||
return client.getDatabase(dbName);
|
return client.getDatabase(dbName);
|
||||||
}
|
}
|
||||||
|
@ -97,7 +96,7 @@ public class MdstoreClient implements Closeable {
|
||||||
String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
|
String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
|
||||||
log.warn(err);
|
log.warn(err);
|
||||||
if (abortIfMissing) {
|
if (abortIfMissing) {
|
||||||
throw new RuntimeException(err);
|
throw new IllegalArgumentException(err);
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,6 @@ import com.google.common.hash.Hashing;
|
||||||
*/
|
*/
|
||||||
public class PacePerson {
|
public class PacePerson {
|
||||||
|
|
||||||
private static final String UTF8 = "UTF-8";
|
|
||||||
private List<String> name = Lists.newArrayList();
|
private List<String> name = Lists.newArrayList();
|
||||||
private List<String> surname = Lists.newArrayList();
|
private List<String> surname = Lists.newArrayList();
|
||||||
private List<String> fullname = Lists.newArrayList();
|
private List<String> fullname = Lists.newArrayList();
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.aggregation.common;
|
package eu.dnetlib.dhp.common.aggregation;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -11,8 +11,6 @@ import java.util.Objects;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.message.MessageSender;
|
import eu.dnetlib.dhp.message.MessageSender;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
@ -20,12 +18,12 @@ public class AggregatorReport extends LinkedHashMap<String, String> implements C
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class);
|
private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class);
|
||||||
|
|
||||||
private MessageSender messageSender;
|
private transient MessageSender messageSender;
|
||||||
|
|
||||||
public AggregatorReport() {
|
public AggregatorReport() {
|
||||||
}
|
}
|
||||||
|
|
||||||
public AggregatorReport(MessageSender messageSender) throws IOException {
|
public AggregatorReport(MessageSender messageSender) {
|
||||||
this.messageSender = messageSender;
|
this.messageSender = messageSender;
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,9 @@ import java.io.*;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.http.HttpHeaders;
|
||||||
|
import org.apache.http.entity.ContentType;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
||||||
|
@ -43,7 +46,7 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
this.deposition_id = deposition_id;
|
this.deposition_id = deposition_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ZenodoAPIClient(String urlString, String access_token) throws IOException {
|
public ZenodoAPIClient(String urlString, String access_token) {
|
||||||
|
|
||||||
this.urlString = urlString;
|
this.urlString = urlString;
|
||||||
this.access_token = access_token;
|
this.access_token = access_token;
|
||||||
|
@ -63,8 +66,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(urlString)
|
.url(urlString)
|
||||||
.addHeader("Content-Type", "application/json") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.post(body)
|
.post(body)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -103,8 +106,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(bucket + "/" + file_name)
|
.url(bucket + "/" + file_name)
|
||||||
.addHeader("Content-Type", "application/zip") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
|
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -130,8 +133,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(urlString + "/" + deposition_id)
|
.url(urlString + "/" + deposition_id)
|
||||||
.addHeader("Content-Type", "application/json") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.put(body)
|
.put(body)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -197,7 +200,7 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(urlString + "/" + deposition_id + "/actions/newversion")
|
.url(urlString + "/" + deposition_id + "/actions/newversion")
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.post(body)
|
.post(body)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -270,8 +273,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(urlString)
|
.url(urlString)
|
||||||
.addHeader("Content-Type", "application/json") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.get()
|
.get()
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -293,8 +296,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(url)
|
.url(url)
|
||||||
.addHeader("Content-Type", "application/json") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.get()
|
.get()
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
|
|
@ -32,13 +32,13 @@ public class Creator {
|
||||||
|
|
||||||
public static Creator newInstance(String name, String affiliation, String orcid) {
|
public static Creator newInstance(String name, String affiliation, String orcid) {
|
||||||
Creator c = new Creator();
|
Creator c = new Creator();
|
||||||
if (!(name == null)) {
|
if (name != null) {
|
||||||
c.name = name;
|
c.name = name;
|
||||||
}
|
}
|
||||||
if (!(affiliation == null)) {
|
if (affiliation != null) {
|
||||||
c.affiliation = affiliation;
|
c.affiliation = affiliation;
|
||||||
}
|
}
|
||||||
if (!(orcid == null)) {
|
if (orcid != null) {
|
||||||
c.orcid = orcid;
|
c.orcid = orcid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,17 +3,12 @@ package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
import net.minidev.json.annotate.JsonIgnore;
|
|
||||||
|
|
||||||
public class File implements Serializable {
|
public class File implements Serializable {
|
||||||
private String checksum;
|
private String checksum;
|
||||||
private String filename;
|
private String filename;
|
||||||
private long filesize;
|
private long filesize;
|
||||||
private String id;
|
private String id;
|
||||||
|
|
||||||
@JsonIgnore
|
|
||||||
// private Links links;
|
|
||||||
|
|
||||||
public String getChecksum() {
|
public String getChecksum() {
|
||||||
return checksum;
|
return checksum;
|
||||||
}
|
}
|
||||||
|
@ -46,13 +41,4 @@ public class File implements Serializable {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
// @JsonIgnore
|
|
||||||
// public Links getLinks() {
|
|
||||||
// return links;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// @JsonIgnore
|
|
||||||
// public void setLinks(Links links) {
|
|
||||||
// this.links = links;
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.collection;
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
public class CollectorException extends Exception {
|
public class CollectorException extends Exception {
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.opencsv.bean.CsvToBeanBuilder;
|
||||||
|
|
||||||
|
public class GetCSV {
|
||||||
|
|
||||||
|
public static final char DEFAULT_DELIMITER = ',';
|
||||||
|
|
||||||
|
private GetCSV() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath,
|
||||||
|
String modelClass) throws IOException, ClassNotFoundException {
|
||||||
|
getCsv(fileSystem, reader, hdfsPath, modelClass, DEFAULT_DELIMITER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void getCsv(FileSystem fileSystem, Reader reader, String hdfsPath,
|
||||||
|
String modelClass, char delimiter) throws IOException, ClassNotFoundException {
|
||||||
|
|
||||||
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
|
FSDataOutputStream fsDataOutputStream = null;
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fileSystem.delete(hdfsWritePath, false);
|
||||||
|
}
|
||||||
|
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||||
|
|
||||||
|
try (BufferedWriter writer = new BufferedWriter(
|
||||||
|
new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))) {
|
||||||
|
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
final List lines = new CsvToBeanBuilder(reader)
|
||||||
|
.withType(Class.forName(modelClass))
|
||||||
|
.withSeparator(delimiter)
|
||||||
|
.build()
|
||||||
|
.parse();
|
||||||
|
|
||||||
|
for (Object line : lines) {
|
||||||
|
writer.write(mapper.writeValueAsString(line));
|
||||||
|
writer.newLine();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.collection;
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bundles the http connection parameters driving the client behaviour.
|
* Bundles the http connection parameters driving the client behaviour.
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.collection;
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
||||||
|
|
||||||
|
@ -15,12 +15,13 @@ import org.apache.http.HttpHeaders;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
import eu.dnetlib.dhp.common.Constants;
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
|
* Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
|
||||||
*
|
*
|
||||||
* @author jochen, michele, andrea, alessia, claudio
|
* @author jochen, michele, andrea, alessia, claudio, andreas
|
||||||
*/
|
*/
|
||||||
public class HttpConnector2 {
|
public class HttpConnector2 {
|
||||||
|
|
||||||
|
@ -32,7 +33,7 @@ public class HttpConnector2 {
|
||||||
|
|
||||||
private String responseType = null;
|
private String responseType = null;
|
||||||
|
|
||||||
private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
|
private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
|
||||||
|
|
||||||
public HttpConnector2() {
|
public HttpConnector2() {
|
||||||
this(new HttpClientParams());
|
this(new HttpClientParams());
|
||||||
|
@ -112,6 +113,17 @@ public class HttpConnector2 {
|
||||||
}
|
}
|
||||||
|
|
||||||
int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
|
int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
|
||||||
|
String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT);
|
||||||
|
String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING);
|
||||||
|
|
||||||
|
if ((rateLimit != null) && (rateRemaining != null) && (Integer.parseInt(rateRemaining) < 9)) {
|
||||||
|
if (retryAfter > 0) {
|
||||||
|
backoffAndSleep(retryAfter);
|
||||||
|
} else {
|
||||||
|
backoffAndSleep(1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (is2xx(urlConn.getResponseCode())) {
|
if (is2xx(urlConn.getResponseCode())) {
|
||||||
input = urlConn.getInputStream();
|
input = urlConn.getInputStream();
|
||||||
responseType = urlConn.getContentType();
|
responseType = urlConn.getContentType();
|
||||||
|
@ -120,7 +132,7 @@ public class HttpConnector2 {
|
||||||
if (is3xx(urlConn.getResponseCode())) {
|
if (is3xx(urlConn.getResponseCode())) {
|
||||||
// REDIRECTS
|
// REDIRECTS
|
||||||
final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
|
final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
|
||||||
log.info(String.format("The requested url has been moved to %s", newUrl));
|
log.info("The requested url has been moved to {}", newUrl);
|
||||||
report
|
report
|
||||||
.put(
|
.put(
|
||||||
REPORT_PREFIX + urlConn.getResponseCode(),
|
REPORT_PREFIX + urlConn.getResponseCode(),
|
||||||
|
@ -140,14 +152,14 @@ public class HttpConnector2 {
|
||||||
if (retryAfter > 0) {
|
if (retryAfter > 0) {
|
||||||
log
|
log
|
||||||
.warn(
|
.warn(
|
||||||
requestUrl + " - waiting and repeating request after suggested retry-after "
|
"{} - waiting and repeating request after suggested retry-after {} sec.",
|
||||||
+ retryAfter + " sec.");
|
requestUrl, retryAfter);
|
||||||
backoffAndSleep(retryAfter * 1000);
|
backoffAndSleep(retryAfter * 1000);
|
||||||
} else {
|
} else {
|
||||||
log
|
log
|
||||||
.warn(
|
.warn(
|
||||||
requestUrl + " - waiting and repeating request after default delay of "
|
"{} - waiting and repeating request after default delay of {} sec.",
|
||||||
+ getClientParams().getRetryDelay() + " sec.");
|
requestUrl, getClientParams().getRetryDelay());
|
||||||
backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
|
backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
|
||||||
}
|
}
|
||||||
report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
|
report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
|
||||||
|
@ -181,12 +193,12 @@ public class HttpConnector2 {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
|
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
|
||||||
log.debug("StatusCode: " + urlConn.getResponseMessage());
|
log.debug("StatusCode: {}", urlConn.getResponseMessage());
|
||||||
|
|
||||||
for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
|
for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
|
||||||
if (e.getKey() != null) {
|
if (e.getKey() != null) {
|
||||||
for (String v : e.getValue()) {
|
for (String v : e.getValue()) {
|
||||||
log.debug(" key: " + e.getKey() + " - value: " + v);
|
log.debug(" key: {} - value: {}", e.getKey(), v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -204,7 +216,7 @@ public class HttpConnector2 {
|
||||||
|
|
||||||
private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
|
private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
|
||||||
for (String key : headerMap.keySet()) {
|
for (String key : headerMap.keySet()) {
|
||||||
if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (headerMap.get(key).size() > 0)
|
if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty())
|
||||||
&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
|
&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
|
||||||
return Integer.parseInt(headerMap.get(key).get(0)) + 10;
|
return Integer.parseInt(headerMap.get(key).get(0)) + 10;
|
||||||
}
|
}
|
|
@ -1,11 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.rest;
|
package eu.dnetlib.dhp.common.rest;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
||||||
import org.apache.http.client.methods.HttpGet;
|
import org.apache.http.client.methods.HttpGet;
|
||||||
import org.apache.http.client.methods.HttpPost;
|
import org.apache.http.client.methods.HttpPost;
|
||||||
import org.apache.http.client.methods.HttpUriRequest;
|
import org.apache.http.client.methods.HttpUriRequest;
|
||||||
|
@ -23,17 +23,20 @@ public class DNetRestClient {
|
||||||
|
|
||||||
private static final ObjectMapper mapper = new ObjectMapper();
|
private static final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
|
||||||
|
private DNetRestClient() {
|
||||||
|
}
|
||||||
|
|
||||||
public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
|
public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
|
||||||
final HttpGet httpGet = new HttpGet(url);
|
final HttpGet httpGet = new HttpGet(url);
|
||||||
return doHTTPRequest(httpGet, clazz);
|
return doHTTPRequest(httpGet, clazz);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String doGET(final String url) throws Exception {
|
public static String doGET(final String url) throws IOException {
|
||||||
final HttpGet httpGet = new HttpGet(url);
|
final HttpGet httpGet = new HttpGet(url);
|
||||||
return doHTTPRequest(httpGet);
|
return doHTTPRequest(httpGet);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <V> String doPOST(final String url, V objParam) throws Exception {
|
public static <V> String doPOST(final String url, V objParam) throws IOException {
|
||||||
final HttpPost httpPost = new HttpPost(url);
|
final HttpPost httpPost = new HttpPost(url);
|
||||||
|
|
||||||
if (objParam != null) {
|
if (objParam != null) {
|
||||||
|
@ -45,12 +48,12 @@ public class DNetRestClient {
|
||||||
return doHTTPRequest(httpPost);
|
return doHTTPRequest(httpPost);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws Exception {
|
public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws IOException {
|
||||||
return mapper.readValue(doPOST(url, objParam), clazz);
|
return mapper.readValue(doPOST(url, objParam), clazz);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String doHTTPRequest(final HttpUriRequest r) throws Exception {
|
private static String doHTTPRequest(final HttpUriRequest r) throws IOException {
|
||||||
CloseableHttpClient client = HttpClients.createDefault();
|
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
||||||
|
|
||||||
log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString());
|
log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString());
|
||||||
log
|
log
|
||||||
|
@ -62,8 +65,8 @@ public class DNetRestClient {
|
||||||
.map(h -> h.getName() + ":" + h.getValue())
|
.map(h -> h.getName() + ":" + h.getValue())
|
||||||
.collect(Collectors.joining(",")));
|
.collect(Collectors.joining(",")));
|
||||||
|
|
||||||
CloseableHttpResponse response = client.execute(r);
|
return IOUtils.toString(client.execute(r).getEntity().getContent());
|
||||||
return IOUtils.toString(response.getEntity().getContent());
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception {
|
private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception {
|
||||||
|
|
|
@ -46,7 +46,7 @@ public class Vocabulary implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public VocabularyTerm getTerm(final String id) {
|
public VocabularyTerm getTerm(final String id) {
|
||||||
return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null);
|
return Optional.ofNullable(id).map(String::toLowerCase).map(terms::get).orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void addTerm(final String id, final String name) {
|
protected void addTerm(final String id, final String name) {
|
||||||
|
@ -81,7 +81,6 @@ public class Vocabulary implements Serializable {
|
||||||
.ofNullable(getTermBySynonym(syn))
|
.ofNullable(getTermBySynonym(syn))
|
||||||
.map(term -> getTermAsQualifier(term.getId()))
|
.map(term -> getTermAsQualifier(term.getId()))
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
// .orElse(OafMapperUtils.unknown(getId(), getName()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,6 @@ public class VocabularyGroup implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
vocs.addTerm(vocId, termId, termName);
|
vocs.addTerm(vocId, termId, termName);
|
||||||
// vocs.addSynonyms(vocId, termId, termId);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,10 +57,17 @@ public class VocabularyGroup implements Serializable {
|
||||||
final String syn = arr[2].trim();
|
final String syn = arr[2].trim();
|
||||||
|
|
||||||
vocs.addSynonyms(vocId, termId, syn);
|
vocs.addSynonyms(vocId, termId, syn);
|
||||||
// vocs.addSynonyms(vocId, termId, termId);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// add the term names as synonyms
|
||||||
|
vocs.vocs.values().forEach(voc -> {
|
||||||
|
voc.getTerms().values().forEach(term -> {
|
||||||
|
voc.addSynonym(term.getName().toLowerCase(), term.getId());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
return vocs;
|
return vocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,7 +104,7 @@ public class VocabularyGroup implements Serializable {
|
||||||
.getTerms()
|
.getTerms()
|
||||||
.values()
|
.values()
|
||||||
.stream()
|
.stream()
|
||||||
.map(t -> t.getId())
|
.map(VocabularyTerm::getId)
|
||||||
.collect(Collectors.toCollection(HashSet::new));
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -154,16 +160,19 @@ public class VocabularyGroup implements Serializable {
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(vocId)
|
.ofNullable(vocId)
|
||||||
.map(String::toLowerCase)
|
.map(String::toLowerCase)
|
||||||
.map(id -> vocs.containsKey(id))
|
.map(vocs::containsKey)
|
||||||
.orElse(false);
|
.orElse(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addSynonyms(final String vocId, final String termId, final String syn) {
|
private void addSynonyms(final String vocId, final String termId, final String syn) {
|
||||||
String id = Optional
|
String id = Optional
|
||||||
.ofNullable(vocId)
|
.ofNullable(vocId)
|
||||||
.map(s -> s.toLowerCase())
|
.map(String::toLowerCase)
|
||||||
.orElseThrow(
|
.orElseThrow(
|
||||||
() -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]")));
|
() -> new IllegalArgumentException(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"empty vocabulary id for [term:%s, synonym:%s]", termId, syn)));
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(vocs.get(id))
|
.ofNullable(vocs.get(id))
|
||||||
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
|
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
package eu.dnetlib.dhp.message;
|
package eu.dnetlib.dhp.message;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -10,8 +9,8 @@ public class Message implements Serializable {
|
||||||
|
|
||||||
private static final long serialVersionUID = 401753881204524893L;
|
private static final long serialVersionUID = 401753881204524893L;
|
||||||
|
|
||||||
public static String CURRENT_PARAM = "current";
|
public static final String CURRENT_PARAM = "current";
|
||||||
public static String TOTAL_PARAM = "total";
|
public static final String TOTAL_PARAM = "total";
|
||||||
|
|
||||||
private MessageType messageType;
|
private MessageType messageType;
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.merge;
|
||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
@ -19,6 +18,9 @@ public class AuthorMerger {
|
||||||
|
|
||||||
private static final Double THRESHOLD = 0.95;
|
private static final Double THRESHOLD = 0.95;
|
||||||
|
|
||||||
|
private AuthorMerger() {
|
||||||
|
}
|
||||||
|
|
||||||
public static List<Author> merge(List<List<Author>> authors) {
|
public static List<Author> merge(List<List<Author>> authors) {
|
||||||
|
|
||||||
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
|
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
|
||||||
|
@ -36,7 +38,8 @@ public class AuthorMerger {
|
||||||
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
|
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
|
||||||
int pa = countAuthorsPids(a);
|
int pa = countAuthorsPids(a);
|
||||||
int pb = countAuthorsPids(b);
|
int pb = countAuthorsPids(b);
|
||||||
List<Author> base, enrich;
|
List<Author> base;
|
||||||
|
List<Author> enrich;
|
||||||
int sa = authorsSize(a);
|
int sa = authorsSize(a);
|
||||||
int sb = authorsSize(b);
|
int sb = authorsSize(b);
|
||||||
|
|
||||||
|
@ -62,7 +65,7 @@ public class AuthorMerger {
|
||||||
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
|
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
|
||||||
final Map<String, Author> basePidAuthorMap = base
|
final Map<String, Author> basePidAuthorMap = base
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
|
||||||
.flatMap(
|
.flatMap(
|
||||||
a -> a
|
a -> a
|
||||||
.getPid()
|
.getPid()
|
||||||
|
@ -74,7 +77,7 @@ public class AuthorMerger {
|
||||||
// <pid, Author> (list of pid that are missing in the other list)
|
// <pid, Author> (list of pid that are missing in the other list)
|
||||||
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
|
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
|
||||||
.flatMap(
|
.flatMap(
|
||||||
a -> a
|
a -> a
|
||||||
.getPid()
|
.getPid()
|
||||||
|
@ -117,9 +120,9 @@ public class AuthorMerger {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String pidToComparableString(StructuredProperty pid) {
|
public static String pidToComparableString(StructuredProperty pid) {
|
||||||
return (pid.getQualifier() != null
|
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
|
||||||
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
|
: "";
|
||||||
: "")
|
return (pid.getQualifier() != null ? classid : "")
|
||||||
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,9 @@ import com.ximpleware.VTDNav;
|
||||||
/** Created by sandro on 9/29/16. */
|
/** Created by sandro on 9/29/16. */
|
||||||
public class VtdUtilityParser {
|
public class VtdUtilityParser {
|
||||||
|
|
||||||
|
private VtdUtilityParser() {
|
||||||
|
}
|
||||||
|
|
||||||
public static List<Node> getTextValuesWithAttributes(
|
public static List<Node> getTextValuesWithAttributes(
|
||||||
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
|
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
|
||||||
throws VtdException {
|
throws VtdException {
|
||||||
|
|
|
@ -7,22 +7,21 @@ import java.time.format.DateTimeFormatter;
|
||||||
import java.time.format.DateTimeParseException;
|
import java.time.format.DateTimeParseException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
import com.github.sisyphsu.dateparser.DateParserUtils;
|
import com.github.sisyphsu.dateparser.DateParserUtils;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Maps;
|
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import me.xuender.unidecode.Unidecode;
|
||||||
|
|
||||||
public class GraphCleaningFunctions extends CleaningFunctions {
|
public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
|
|
||||||
|
@ -30,8 +29,11 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
public static final int ORCID_LEN = 19;
|
public static final int ORCID_LEN = 19;
|
||||||
public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
|
public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
|
||||||
public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*";
|
public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*";
|
||||||
public static final String TITLE_FILTER_REGEX = "[.*test.*\\W\\d]";
|
|
||||||
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 10;
|
public static final String TITLE_TEST = "test";
|
||||||
|
public static final String TITLE_FILTER_REGEX = String.format("(%s)|\\W|\\d", TITLE_TEST);
|
||||||
|
|
||||||
|
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
|
||||||
|
|
||||||
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
|
@ -86,6 +88,22 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends Oaf> boolean filter(T value) {
|
public static <T extends Oaf> boolean filter(T value) {
|
||||||
|
if (Boolean.TRUE
|
||||||
|
.equals(
|
||||||
|
Optional
|
||||||
|
.ofNullable(value)
|
||||||
|
.map(
|
||||||
|
o -> Optional
|
||||||
|
.ofNullable(o.getDataInfo())
|
||||||
|
.map(
|
||||||
|
d -> Optional
|
||||||
|
.ofNullable(d.getInvisible())
|
||||||
|
.orElse(true))
|
||||||
|
.orElse(true))
|
||||||
|
.orElse(true))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
// nothing to evaluate here
|
// nothing to evaluate here
|
||||||
} else if (value instanceof Project) {
|
} else if (value instanceof Project) {
|
||||||
|
@ -115,7 +133,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends Oaf> T cleanup(T value) {
|
public static <T extends Oaf> T cleanup(T value, VocabularyGroup vocs) {
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
// nothing to clean here
|
// nothing to clean here
|
||||||
} else if (value instanceof Project) {
|
} else if (value instanceof Project) {
|
||||||
|
@ -194,11 +212,30 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
||||||
.filter(
|
.filter(
|
||||||
sp -> sp
|
sp -> {
|
||||||
|
final String title = sp
|
||||||
.getValue()
|
.getValue()
|
||||||
.toLowerCase()
|
.toLowerCase();
|
||||||
|
final String decoded = Unidecode.decode(title);
|
||||||
|
|
||||||
|
if (StringUtils.contains(decoded, TITLE_TEST)) {
|
||||||
|
return decoded
|
||||||
.replaceAll(TITLE_FILTER_REGEX, "")
|
.replaceAll(TITLE_FILTER_REGEX, "")
|
||||||
.length() > TITLE_FILTER_RESIDUAL_LENGTH)
|
.length() > TITLE_FILTER_RESIDUAL_LENGTH;
|
||||||
|
}
|
||||||
|
return !decoded
|
||||||
|
.replaceAll("\\W|\\d", "")
|
||||||
|
.isEmpty();
|
||||||
|
})
|
||||||
|
.map(GraphCleaningFunctions::cleanValue)
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
if (Objects.nonNull(r.getFormat())) {
|
||||||
|
r
|
||||||
|
.setFormat(
|
||||||
|
r
|
||||||
|
.getFormat()
|
||||||
|
.stream()
|
||||||
.map(GraphCleaningFunctions::cleanValue)
|
.map(GraphCleaningFunctions::cleanValue)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
|
@ -224,6 +261,38 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
if (Objects.nonNull(r.getInstance())) {
|
if (Objects.nonNull(r.getInstance())) {
|
||||||
|
|
||||||
for (Instance i : r.getInstance()) {
|
for (Instance i : r.getInstance()) {
|
||||||
|
if (!vocs.termExists(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getClassid())) {
|
||||||
|
if (r instanceof Publication) {
|
||||||
|
i
|
||||||
|
.setInstancetype(
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
"0038", "Other literature type", ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE));
|
||||||
|
} else if (r instanceof Dataset) {
|
||||||
|
i
|
||||||
|
.setInstancetype(
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
"0039", "Other dataset type", ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE));
|
||||||
|
} else if (r instanceof Software) {
|
||||||
|
i
|
||||||
|
.setInstancetype(
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
"0040", "Other software type", ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE));
|
||||||
|
} else if (r instanceof OtherResearchProduct) {
|
||||||
|
i
|
||||||
|
.setInstancetype(
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
"0020", "Other ORP type", ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (Objects.nonNull(i.getPid())) {
|
if (Objects.nonNull(i.getPid())) {
|
||||||
i.setPid(processPidCleaning(i.getPid()));
|
i.setPid(processPidCleaning(i.getPid()));
|
||||||
}
|
}
|
||||||
|
@ -283,7 +352,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
r
|
r
|
||||||
.getAuthor()
|
.getAuthor()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> Objects.nonNull(a))
|
.filter(Objects::nonNull)
|
||||||
.filter(a -> StringUtils.isNotBlank(a.getFullname()))
|
.filter(a -> StringUtils.isNotBlank(a.getFullname()))
|
||||||
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
|
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
|
|
|
@ -17,13 +17,16 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class OafMapperUtils {
|
public class OafMapperUtils {
|
||||||
|
|
||||||
|
private OafMapperUtils() {
|
||||||
|
}
|
||||||
|
|
||||||
public static Oaf merge(final Oaf left, final Oaf right) {
|
public static Oaf merge(final Oaf left, final Oaf right) {
|
||||||
if (ModelSupport.isSubClass(left, OafEntity.class)) {
|
if (ModelSupport.isSubClass(left, OafEntity.class)) {
|
||||||
return mergeEntities((OafEntity) left, (OafEntity) right);
|
return mergeEntities((OafEntity) left, (OafEntity) right);
|
||||||
} else if (ModelSupport.isSubClass(left, Relation.class)) {
|
} else if (ModelSupport.isSubClass(left, Relation.class)) {
|
||||||
((Relation) left).mergeFrom((Relation) right);
|
((Relation) left).mergeFrom((Relation) right);
|
||||||
} else {
|
} else {
|
||||||
throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName());
|
throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
|
||||||
}
|
}
|
||||||
return left;
|
return left;
|
||||||
}
|
}
|
||||||
|
@ -38,7 +41,7 @@ public class OafMapperUtils {
|
||||||
} else if (ModelSupport.isSubClass(left, Project.class)) {
|
} else if (ModelSupport.isSubClass(left, Project.class)) {
|
||||||
left.mergeFrom(right);
|
left.mergeFrom(right);
|
||||||
} else {
|
} else {
|
||||||
throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
|
throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
|
||||||
}
|
}
|
||||||
return left;
|
return left;
|
||||||
}
|
}
|
||||||
|
@ -62,7 +65,7 @@ public class OafMapperUtils {
|
||||||
|
|
||||||
public static List<KeyValue> listKeyValues(final String... s) {
|
public static List<KeyValue> listKeyValues(final String... s) {
|
||||||
if (s.length % 2 > 0) {
|
if (s.length % 2 > 0) {
|
||||||
throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)");
|
throw new IllegalArgumentException("Invalid number of parameters (k,v,k,v,....)");
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<KeyValue> list = new ArrayList<>();
|
final List<KeyValue> list = new ArrayList<>();
|
||||||
|
@ -88,7 +91,7 @@ public class OafMapperUtils {
|
||||||
.stream(values)
|
.stream(values)
|
||||||
.map(v -> field(v, info))
|
.map(v -> field(v, info))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(distinctByKey(f -> f.getValue()))
|
.filter(distinctByKey(Field::getValue))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,7 +100,7 @@ public class OafMapperUtils {
|
||||||
.stream()
|
.stream()
|
||||||
.map(v -> field(v, info))
|
.map(v -> field(v, info))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(distinctByKey(f -> f.getValue()))
|
.filter(distinctByKey(Field::getValue))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -342,10 +345,10 @@ public class OafMapperUtils {
|
||||||
if (instanceList != null) {
|
if (instanceList != null) {
|
||||||
final Optional<AccessRight> min = instanceList
|
final Optional<AccessRight> min = instanceList
|
||||||
.stream()
|
.stream()
|
||||||
.map(i -> i.getAccessright())
|
.map(Instance::getAccessright)
|
||||||
.min(new AccessRightComparator<>());
|
.min(new AccessRightComparator<>());
|
||||||
|
|
||||||
final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier();
|
final Qualifier rights = min.map(OafMapperUtils::qualifier).orElseGet(Qualifier::new);
|
||||||
|
|
||||||
if (StringUtils.isBlank(rights.getClassid())) {
|
if (StringUtils.isBlank(rights.getClassid())) {
|
||||||
rights.setClassid(UNKNOWN);
|
rights.setClassid(UNKNOWN);
|
||||||
|
|
|
@ -4,19 +4,19 @@ package eu.dnetlib.dhp.utils;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.security.MessageDigest;
|
import java.security.MessageDigest;
|
||||||
import java.util.List;
|
import java.util.*;
|
||||||
import java.util.Map;
|
import java.util.stream.Collectors;
|
||||||
import java.util.Properties;
|
|
||||||
import java.util.zip.GZIPInputStream;
|
|
||||||
import java.util.zip.GZIPOutputStream;
|
|
||||||
|
|
||||||
import org.apache.commons.codec.binary.Base64;
|
|
||||||
import org.apache.commons.codec.binary.Base64OutputStream;
|
|
||||||
import org.apache.commons.codec.binary.Hex;
|
import org.apache.commons.codec.binary.Hex;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||||
|
import org.apache.http.client.methods.HttpGet;
|
||||||
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.impl.client.HttpClients;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.SaveMode;
|
import org.apache.spark.sql.SaveMode;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -26,6 +26,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import com.jayway.jsonpath.JsonPath;
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
||||||
import net.minidev.json.JSONArray;
|
import net.minidev.json.JSONArray;
|
||||||
import scala.collection.JavaConverters;
|
import scala.collection.JavaConverters;
|
||||||
import scala.collection.Seq;
|
import scala.collection.Seq;
|
||||||
|
@ -34,6 +36,9 @@ public class DHPUtils {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
|
private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
|
||||||
|
|
||||||
|
private DHPUtils() {
|
||||||
|
}
|
||||||
|
|
||||||
public static Seq<String> toSeq(List<String> list) {
|
public static Seq<String> toSeq(List<String> list) {
|
||||||
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
|
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
|
||||||
}
|
}
|
||||||
|
@ -44,40 +49,59 @@ public class DHPUtils {
|
||||||
md.update(s.getBytes(StandardCharsets.UTF_8));
|
md.update(s.getBytes(StandardCharsets.UTF_8));
|
||||||
return new String(Hex.encodeHex(md.digest()));
|
return new String(Hex.encodeHex(md.digest()));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
System.err.println("Error creating id");
|
log.error("Error creating id from {}", s);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves from the metadata store manager application the list of paths associated with mdstores characterized
|
||||||
|
* by he given format, layout, interpretation
|
||||||
|
* @param mdstoreManagerUrl the URL of the mdstore manager service
|
||||||
|
* @param format the mdstore format
|
||||||
|
* @param layout the mdstore layout
|
||||||
|
* @param interpretation the mdstore interpretation
|
||||||
|
* @param includeEmpty include Empty mdstores
|
||||||
|
* @return the set of hdfs paths
|
||||||
|
* @throws IOException in case of HTTP communication issues
|
||||||
|
*/
|
||||||
|
public static Set<String> mdstorePaths(final String mdstoreManagerUrl,
|
||||||
|
final String format,
|
||||||
|
final String layout,
|
||||||
|
final String interpretation,
|
||||||
|
boolean includeEmpty) throws IOException {
|
||||||
|
final String url = mdstoreManagerUrl + "/mdstores/";
|
||||||
|
final ObjectMapper objectMapper = new ObjectMapper();
|
||||||
|
|
||||||
|
final HttpGet req = new HttpGet(url);
|
||||||
|
|
||||||
|
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||||
|
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||||
|
final String json = IOUtils.toString(response.getEntity().getContent());
|
||||||
|
final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class);
|
||||||
|
return Arrays
|
||||||
|
.stream(mdstores)
|
||||||
|
.filter(md -> md.getFormat().equalsIgnoreCase(format))
|
||||||
|
.filter(md -> md.getLayout().equalsIgnoreCase(layout))
|
||||||
|
.filter(md -> md.getInterpretation().equalsIgnoreCase(interpretation))
|
||||||
|
.filter(md -> StringUtils.isNotBlank(md.getHdfsPath()))
|
||||||
|
.filter(md -> StringUtils.isNotBlank(md.getCurrentVersion()))
|
||||||
|
.filter(md -> includeEmpty || md.getSize() > 0)
|
||||||
|
.map(md -> md.getHdfsPath() + "/" + md.getCurrentVersion() + "/store")
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static String generateIdentifier(final String originalId, final String nsPrefix) {
|
public static String generateIdentifier(final String originalId, final String nsPrefix) {
|
||||||
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
|
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String compressString(final String input) {
|
public static String generateUnresolvedIdentifier(final String pid, final String pidType) {
|
||||||
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
||||||
Base64OutputStream b64os = new Base64OutputStream(out)) {
|
|
||||||
GZIPOutputStream gzip = new GZIPOutputStream(b64os);
|
|
||||||
gzip.write(input.getBytes(StandardCharsets.UTF_8));
|
|
||||||
gzip.close();
|
|
||||||
return out.toString();
|
|
||||||
} catch (Throwable e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String decompressString(final String input) {
|
final String cleanedPid = CleaningFunctions.normalizePidValue(pidType, pid);
|
||||||
byte[] byteArray = Base64.decodeBase64(input.getBytes());
|
|
||||||
int len;
|
return String.format("unresolved::%s::%s", cleanedPid, pidType.toLowerCase().trim());
|
||||||
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray)));
|
|
||||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
|
|
||||||
byte[] buffer = new byte[1024];
|
|
||||||
while ((len = gis.read(buffer)) != -1) {
|
|
||||||
bos.write(buffer, 0, len);
|
|
||||||
}
|
|
||||||
return bos.toString();
|
|
||||||
} catch (Exception e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getJPathString(final String jsonPath, final String json) {
|
public static String getJPathString(final String jsonPath, final String json) {
|
||||||
|
|
|
@ -18,13 +18,16 @@ public class ISLookupClientFactory {
|
||||||
private static final int requestTimeout = 60000 * 10;
|
private static final int requestTimeout = 60000 * 10;
|
||||||
private static final int connectTimeout = 60000 * 10;
|
private static final int connectTimeout = 60000 * 10;
|
||||||
|
|
||||||
|
private ISLookupClientFactory() {
|
||||||
|
}
|
||||||
|
|
||||||
public static ISLookUpService getLookUpService(final String isLookupUrl) {
|
public static ISLookUpService getLookUpService(final String isLookupUrl) {
|
||||||
return getServiceStub(ISLookUpService.class, isLookupUrl);
|
return getServiceStub(ISLookUpService.class, isLookupUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) {
|
private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) {
|
||||||
log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint));
|
log.info("creating {} stub from {}", clazz.getName(), endpoint);
|
||||||
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
|
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
|
||||||
jaxWsProxyFactory.setServiceClass(clazz);
|
jaxWsProxyFactory.setServiceClass(clazz);
|
||||||
jaxWsProxyFactory.setAddress(endpoint);
|
jaxWsProxyFactory.setAddress(endpoint);
|
||||||
|
@ -38,12 +41,10 @@ public class ISLookupClientFactory {
|
||||||
|
|
||||||
log
|
log
|
||||||
.info(
|
.info(
|
||||||
String
|
"setting connectTimeout to {}, requestTimeout to {} for service {}",
|
||||||
.format(
|
|
||||||
"setting connectTimeout to %s, requestTimeout to %s for service %s",
|
|
||||||
connectTimeout,
|
connectTimeout,
|
||||||
requestTimeout,
|
requestTimeout,
|
||||||
clazz.getCanonicalName()));
|
clazz.getCanonicalName());
|
||||||
|
|
||||||
policy.setConnectionTimeout(connectTimeout);
|
policy.setConnectionTimeout(connectTimeout);
|
||||||
policy.setReceiveTimeout(requestTimeout);
|
policy.setReceiveTimeout(requestTimeout);
|
||||||
|
|
|
@ -10,7 +10,7 @@ import net.sf.saxon.trans.XPathException;
|
||||||
|
|
||||||
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
|
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
|
||||||
|
|
||||||
public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
|
public static final String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
|
||||||
|
|
||||||
public abstract String getName();
|
public abstract String getName();
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ public class ExtractYear extends AbstractExtensionFunction {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
||||||
if (arguments == null | arguments.length == 0) {
|
if (arguments == null || arguments.length == 0) {
|
||||||
return new StringValue("");
|
return new StringValue("");
|
||||||
}
|
}
|
||||||
final Item item = arguments[0].head();
|
final Item item = arguments[0].head();
|
||||||
|
@ -63,8 +63,7 @@ public class ExtractYear extends AbstractExtensionFunction {
|
||||||
for (String format : dateFormats) {
|
for (String format : dateFormats) {
|
||||||
try {
|
try {
|
||||||
c.setTime(new SimpleDateFormat(format).parse(s));
|
c.setTime(new SimpleDateFormat(format).parse(s));
|
||||||
String year = String.valueOf(c.get(Calendar.YEAR));
|
return String.valueOf(c.get(Calendar.YEAR));
|
||||||
return year;
|
|
||||||
} catch (ParseException e) {
|
} catch (ParseException e) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
||||||
if (arguments == null | arguments.length == 0) {
|
if (arguments == null || arguments.length == 0) {
|
||||||
return new StringValue(BLANK);
|
return new StringValue(BLANK);
|
||||||
}
|
}
|
||||||
String s = arguments[0].head().getStringValue();
|
String s = arguments[0].head().getStringValue();
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.utils.saxon;
|
package eu.dnetlib.dhp.utils.saxon;
|
||||||
|
|
||||||
|
import static org.apache.commons.lang3.StringUtils.isNotBlank;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import net.sf.saxon.expr.XPathContext;
|
import net.sf.saxon.expr.XPathContext;
|
||||||
|
@ -26,7 +28,8 @@ public class PickFirst extends AbstractExtensionFunction {
|
||||||
final String s1 = getValue(arguments[0]);
|
final String s1 = getValue(arguments[0]);
|
||||||
final String s2 = getValue(arguments[1]);
|
final String s2 = getValue(arguments[1]);
|
||||||
|
|
||||||
return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : "");
|
final String value = isNotBlank(s1) ? s1 : isNotBlank(s2) ? s2 : "";
|
||||||
|
return new StringValue(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getValue(final Sequence arg) throws XPathException {
|
private String getValue(final Sequence arg) throws XPathException {
|
||||||
|
|
|
@ -12,6 +12,9 @@ import net.sf.saxon.TransformerFactoryImpl;
|
||||||
|
|
||||||
public class SaxonTransformerFactory {
|
public class SaxonTransformerFactory {
|
||||||
|
|
||||||
|
private SaxonTransformerFactory() {
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates the index record transformer from the given XSLT
|
* Creates the index record transformer from the given XSLT
|
||||||
*
|
*
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
package eu.dnetlib.dhp.application
|
||||||
|
|
||||||
|
import scala.io.Source
|
||||||
|
|
||||||
|
/** This is the main Interface SparkApplication
|
||||||
|
* where all the Spark Scala class should inherit
|
||||||
|
*/
|
||||||
|
trait SparkScalaApplication {
|
||||||
|
|
||||||
|
/** This is the path in the classpath of the json
|
||||||
|
* describes all the argument needed to run
|
||||||
|
*/
|
||||||
|
val propertyPath: String
|
||||||
|
|
||||||
|
/** Utility to parse the arguments using the
|
||||||
|
* property json in the classpath identified from
|
||||||
|
* the variable propertyPath
|
||||||
|
*
|
||||||
|
* @param args the list of arguments
|
||||||
|
*/
|
||||||
|
def parseArguments(args: Array[String]): ArgumentApplicationParser = {
|
||||||
|
val parser = new ArgumentApplicationParser(
|
||||||
|
Source.fromInputStream(getClass.getResourceAsStream(propertyPath)).mkString
|
||||||
|
)
|
||||||
|
parser.parseArgument(args)
|
||||||
|
parser
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Here all the spark applications runs this method
|
||||||
|
* where the whole logic of the spark node is defined
|
||||||
|
*/
|
||||||
|
def run(): Unit
|
||||||
|
}
|
||||||
|
|
||||||
|
import org.apache.spark.SparkConf
|
||||||
|
import org.apache.spark.sql.SparkSession
|
||||||
|
import org.slf4j.Logger
|
||||||
|
|
||||||
|
abstract class AbstractScalaApplication(
|
||||||
|
val propertyPath: String,
|
||||||
|
val args: Array[String],
|
||||||
|
log: Logger
|
||||||
|
) extends SparkScalaApplication {
|
||||||
|
|
||||||
|
var parser: ArgumentApplicationParser = null
|
||||||
|
|
||||||
|
var spark: SparkSession = null
|
||||||
|
|
||||||
|
def initialize(): SparkScalaApplication = {
|
||||||
|
parser = parseArguments(args)
|
||||||
|
spark = createSparkSession()
|
||||||
|
this
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Utility for creating a spark session starting from parser
|
||||||
|
*
|
||||||
|
* @return a spark Session
|
||||||
|
*/
|
||||||
|
private def createSparkSession(): SparkSession = {
|
||||||
|
require(parser != null)
|
||||||
|
|
||||||
|
val conf: SparkConf = new SparkConf()
|
||||||
|
val master = parser.get("master")
|
||||||
|
log.info(s"Creating Spark session: Master: $master")
|
||||||
|
SparkSession
|
||||||
|
.builder()
|
||||||
|
.config(conf)
|
||||||
|
.appName(getClass.getSimpleName)
|
||||||
|
.master(master)
|
||||||
|
.getOrCreate()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,442 @@
|
||||||
|
package eu.dnetlib.dhp.sx.graph.scholix
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty}
|
||||||
|
import eu.dnetlib.dhp.schema.sx.scholix._
|
||||||
|
import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology}
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
|
import org.apache.spark.sql.expressions.Aggregator
|
||||||
|
import org.apache.spark.sql.{Encoder, Encoders}
|
||||||
|
import org.json4s
|
||||||
|
import org.json4s.DefaultFormats
|
||||||
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
import scala.io.Source
|
||||||
|
|
||||||
|
object ScholixUtils extends Serializable {
|
||||||
|
|
||||||
|
val DNET_IDENTIFIER_SCHEMA: String = "DNET Identifier"
|
||||||
|
|
||||||
|
val DATE_RELATION_KEY: String = "RelationDate"
|
||||||
|
|
||||||
|
case class RelationVocabulary(original: String, inverse: String) {}
|
||||||
|
|
||||||
|
case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {}
|
||||||
|
|
||||||
|
val relations: Map[String, RelationVocabulary] = {
|
||||||
|
val input = Source
|
||||||
|
.fromInputStream(
|
||||||
|
getClass.getResourceAsStream("/eu/dnetlib/scholexplorer/relation/relations.json")
|
||||||
|
)
|
||||||
|
.mkString
|
||||||
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
|
||||||
|
lazy val json: json4s.JValue = parse(input)
|
||||||
|
|
||||||
|
json.extract[Map[String, RelationVocabulary]]
|
||||||
|
}
|
||||||
|
|
||||||
|
def extractRelationDate(relation: Relation): String = {
|
||||||
|
|
||||||
|
if (relation.getProperties == null || !relation.getProperties.isEmpty)
|
||||||
|
null
|
||||||
|
else {
|
||||||
|
val date = relation.getProperties.asScala
|
||||||
|
.find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey))
|
||||||
|
.map(p => p.getValue)
|
||||||
|
if (date.isDefined)
|
||||||
|
date.get
|
||||||
|
else
|
||||||
|
null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def extractRelationDate(summary: ScholixSummary): String = {
|
||||||
|
|
||||||
|
if (summary.getDate == null || summary.getDate.isEmpty)
|
||||||
|
null
|
||||||
|
else {
|
||||||
|
summary.getDate.get(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = {
|
||||||
|
new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateScholixResourceFromResult(r: Result): ScholixResource = {
|
||||||
|
generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r))
|
||||||
|
}
|
||||||
|
|
||||||
|
val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] =
|
||||||
|
new Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] with Serializable {
|
||||||
|
override def zero: RelatedEntities = null
|
||||||
|
|
||||||
|
override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = {
|
||||||
|
val relatedDataset = if ("dataset".equalsIgnoreCase(a._2)) a._3 else 0
|
||||||
|
val relatedPublication = if ("publication".equalsIgnoreCase(a._2)) a._3 else 0
|
||||||
|
|
||||||
|
if (b == null)
|
||||||
|
RelatedEntities(a._1, relatedDataset, relatedPublication)
|
||||||
|
else
|
||||||
|
RelatedEntities(
|
||||||
|
a._1,
|
||||||
|
b.relatedDataset + relatedDataset,
|
||||||
|
b.relatedPublication + relatedPublication
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
override def merge(b1: RelatedEntities, b2: RelatedEntities): RelatedEntities = {
|
||||||
|
if (b1 != null && b2 != null)
|
||||||
|
RelatedEntities(
|
||||||
|
b1.id,
|
||||||
|
b1.relatedDataset + b2.relatedDataset,
|
||||||
|
b1.relatedPublication + b2.relatedPublication
|
||||||
|
)
|
||||||
|
else if (b1 != null)
|
||||||
|
b1
|
||||||
|
else
|
||||||
|
b2
|
||||||
|
}
|
||||||
|
|
||||||
|
override def finish(reduction: RelatedEntities): RelatedEntities = reduction
|
||||||
|
|
||||||
|
override def bufferEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities])
|
||||||
|
|
||||||
|
override def outputEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities])
|
||||||
|
}
|
||||||
|
|
||||||
|
val scholixAggregator: Aggregator[(String, Scholix), Scholix, Scholix] =
|
||||||
|
new Aggregator[(String, Scholix), Scholix, Scholix] with Serializable {
|
||||||
|
override def zero: Scholix = null
|
||||||
|
|
||||||
|
def scholix_complete(s: Scholix): Boolean = {
|
||||||
|
if (s == null || s.getIdentifier == null) {
|
||||||
|
false
|
||||||
|
} else if (s.getSource == null || s.getTarget == null) {
|
||||||
|
false
|
||||||
|
} else if (s.getLinkprovider == null || s.getLinkprovider.isEmpty)
|
||||||
|
false
|
||||||
|
else
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
override def reduce(b: Scholix, a: (String, Scholix)): Scholix = {
|
||||||
|
if (scholix_complete(b)) b else a._2
|
||||||
|
}
|
||||||
|
|
||||||
|
override def merge(b1: Scholix, b2: Scholix): Scholix = {
|
||||||
|
if (scholix_complete(b1)) b1 else b2
|
||||||
|
}
|
||||||
|
|
||||||
|
override def finish(reduction: Scholix): Scholix = reduction
|
||||||
|
|
||||||
|
override def bufferEncoder: Encoder[Scholix] = Encoders.kryo[Scholix]
|
||||||
|
|
||||||
|
override def outputEncoder: Encoder[Scholix] = Encoders.kryo[Scholix]
|
||||||
|
}
|
||||||
|
|
||||||
|
def createInverseScholixRelation(scholix: Scholix): Scholix = {
|
||||||
|
val s = new Scholix
|
||||||
|
s.setPublicationDate(scholix.getPublicationDate)
|
||||||
|
s.setPublisher(scholix.getPublisher)
|
||||||
|
s.setLinkprovider(scholix.getLinkprovider)
|
||||||
|
s.setRelationship(inverseRelationShip(scholix.getRelationship))
|
||||||
|
s.setSource(scholix.getTarget)
|
||||||
|
s.setTarget(scholix.getSource)
|
||||||
|
s.setIdentifier(
|
||||||
|
DHPUtils.md5(
|
||||||
|
s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
s
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def extractCollectedFrom(summary: ScholixResource): List[ScholixEntityId] = {
|
||||||
|
if (summary.getCollectedFrom != null && !summary.getCollectedFrom.isEmpty) {
|
||||||
|
val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { d =>
|
||||||
|
new ScholixEntityId(d.getProvider.getName, d.getProvider.getIdentifiers)
|
||||||
|
}(collection.breakOut)
|
||||||
|
l
|
||||||
|
} else List()
|
||||||
|
}
|
||||||
|
|
||||||
|
def extractCollectedFrom(summary: ScholixSummary): List[ScholixEntityId] = {
|
||||||
|
if (summary.getDatasources != null && !summary.getDatasources.isEmpty) {
|
||||||
|
val l: List[ScholixEntityId] = summary.getDatasources.asScala.map { d =>
|
||||||
|
new ScholixEntityId(
|
||||||
|
d.getDatasourceName,
|
||||||
|
List(new ScholixIdentifier(d.getDatasourceId, "DNET Identifier", null)).asJava
|
||||||
|
)
|
||||||
|
}(collection.breakOut)
|
||||||
|
l
|
||||||
|
} else List()
|
||||||
|
}
|
||||||
|
|
||||||
|
def extractCollectedFrom(relation: Relation): List[ScholixEntityId] = {
|
||||||
|
if (relation.getCollectedfrom != null && !relation.getCollectedfrom.isEmpty) {
|
||||||
|
|
||||||
|
val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { c =>
|
||||||
|
new ScholixEntityId(
|
||||||
|
c.getValue,
|
||||||
|
List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava
|
||||||
|
)
|
||||||
|
}.toList
|
||||||
|
l
|
||||||
|
} else List()
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateCompleteScholix(scholix: Scholix, target: ScholixSummary): Scholix = {
|
||||||
|
val s = new Scholix
|
||||||
|
s.setPublicationDate(scholix.getPublicationDate)
|
||||||
|
s.setPublisher(scholix.getPublisher)
|
||||||
|
s.setLinkprovider(scholix.getLinkprovider)
|
||||||
|
s.setRelationship(scholix.getRelationship)
|
||||||
|
s.setSource(scholix.getSource)
|
||||||
|
s.setTarget(generateScholixResourceFromSummary(target))
|
||||||
|
s.setIdentifier(
|
||||||
|
DHPUtils.md5(
|
||||||
|
s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
s
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateCompleteScholix(scholix: Scholix, target: ScholixResource): Scholix = {
|
||||||
|
val s = new Scholix
|
||||||
|
s.setPublicationDate(scholix.getPublicationDate)
|
||||||
|
s.setPublisher(scholix.getPublisher)
|
||||||
|
s.setLinkprovider(scholix.getLinkprovider)
|
||||||
|
s.setRelationship(scholix.getRelationship)
|
||||||
|
s.setSource(scholix.getSource)
|
||||||
|
s.setTarget(target)
|
||||||
|
s.setIdentifier(
|
||||||
|
DHPUtils.md5(
|
||||||
|
s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
s
|
||||||
|
}
|
||||||
|
|
||||||
|
def generateScholixResourceFromSummary(summaryObject: ScholixSummary): ScholixResource = {
|
||||||
|
val r = new ScholixResource
|
||||||
|
r.setIdentifier(summaryObject.getLocalIdentifier)
|
||||||
|
r.setDnetIdentifier(summaryObject.getId)
|
||||||
|
|
||||||
|
r.setObjectType(summaryObject.getTypology.toString)
|
||||||
|
r.setObjectSubType(summaryObject.getSubType)
|
||||||
|
|
||||||
|
if (summaryObject.getTitle != null && !summaryObject.getTitle.isEmpty)
|
||||||
|
r.setTitle(summaryObject.getTitle.get(0))
|
||||||
|
|
||||||
|
if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) {
|
||||||
|
val l: List[ScholixEntityId] =
|
||||||
|
summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList
|
||||||
|
if (l.nonEmpty)
|
||||||
|
r.setCreator(l.asJava)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (summaryObject.getDate != null && !summaryObject.getDate.isEmpty)
|
||||||
|
r.setPublicationDate(summaryObject.getDate.get(0))
|
||||||
|
if (summaryObject.getPublisher != null && !summaryObject.getPublisher.isEmpty) {
|
||||||
|
val plist: List[ScholixEntityId] =
|
||||||
|
summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList
|
||||||
|
|
||||||
|
if (plist.nonEmpty)
|
||||||
|
r.setPublisher(plist.asJava)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (summaryObject.getDatasources != null && !summaryObject.getDatasources.isEmpty) {
|
||||||
|
|
||||||
|
val l: List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala
|
||||||
|
.map(c =>
|
||||||
|
new ScholixCollectedFrom(
|
||||||
|
new ScholixEntityId(
|
||||||
|
c.getDatasourceName,
|
||||||
|
List(new ScholixIdentifier(c.getDatasourceId, DNET_IDENTIFIER_SCHEMA, null)).asJava
|
||||||
|
),
|
||||||
|
"collected",
|
||||||
|
"complete"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.toList
|
||||||
|
|
||||||
|
if (l.nonEmpty)
|
||||||
|
r.setCollectedFrom(l.asJava)
|
||||||
|
|
||||||
|
}
|
||||||
|
r
|
||||||
|
}
|
||||||
|
|
||||||
|
def scholixFromSource(relation: Relation, source: ScholixResource): Scholix = {
|
||||||
|
if (relation == null || source == null)
|
||||||
|
return null
|
||||||
|
val s = new Scholix
|
||||||
|
var l: List[ScholixEntityId] = extractCollectedFrom(relation)
|
||||||
|
if (l.isEmpty)
|
||||||
|
l = extractCollectedFrom(source)
|
||||||
|
if (l.isEmpty)
|
||||||
|
return null
|
||||||
|
s.setLinkprovider(l.asJava)
|
||||||
|
var d = extractRelationDate(relation)
|
||||||
|
if (d == null)
|
||||||
|
d = source.getPublicationDate
|
||||||
|
|
||||||
|
s.setPublicationDate(d)
|
||||||
|
|
||||||
|
if (source.getPublisher != null && !source.getPublisher.isEmpty) {
|
||||||
|
s.setPublisher(source.getPublisher)
|
||||||
|
}
|
||||||
|
|
||||||
|
val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null)
|
||||||
|
if (semanticRelation == null)
|
||||||
|
return null
|
||||||
|
s.setRelationship(
|
||||||
|
new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)
|
||||||
|
)
|
||||||
|
s.setSource(source)
|
||||||
|
|
||||||
|
s
|
||||||
|
}
|
||||||
|
|
||||||
|
def scholixFromSource(relation: Relation, source: ScholixSummary): Scholix = {
|
||||||
|
|
||||||
|
if (relation == null || source == null)
|
||||||
|
return null
|
||||||
|
|
||||||
|
val s = new Scholix
|
||||||
|
|
||||||
|
var l: List[ScholixEntityId] = extractCollectedFrom(relation)
|
||||||
|
if (l.isEmpty)
|
||||||
|
l = extractCollectedFrom(source)
|
||||||
|
if (l.isEmpty)
|
||||||
|
return null
|
||||||
|
|
||||||
|
s.setLinkprovider(l.asJava)
|
||||||
|
|
||||||
|
var d = extractRelationDate(relation)
|
||||||
|
if (d == null)
|
||||||
|
d = extractRelationDate(source)
|
||||||
|
|
||||||
|
s.setPublicationDate(d)
|
||||||
|
|
||||||
|
if (source.getPublisher != null && !source.getPublisher.isEmpty) {
|
||||||
|
val l: List[ScholixEntityId] = source.getPublisher.asScala
|
||||||
|
.map { p =>
|
||||||
|
new ScholixEntityId(p, null)
|
||||||
|
}(collection.breakOut)
|
||||||
|
|
||||||
|
if (l.nonEmpty)
|
||||||
|
s.setPublisher(l.asJava)
|
||||||
|
}
|
||||||
|
|
||||||
|
val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null)
|
||||||
|
if (semanticRelation == null)
|
||||||
|
return null
|
||||||
|
s.setRelationship(
|
||||||
|
new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)
|
||||||
|
)
|
||||||
|
s.setSource(generateScholixResourceFromSummary(source))
|
||||||
|
|
||||||
|
s
|
||||||
|
}
|
||||||
|
|
||||||
|
def findURLForPID(
|
||||||
|
pidValue: List[StructuredProperty],
|
||||||
|
urls: List[String]
|
||||||
|
): List[(StructuredProperty, String)] = {
|
||||||
|
pidValue.map { p =>
|
||||||
|
val pv = p.getValue
|
||||||
|
|
||||||
|
val r = urls.find(u => u.toLowerCase.contains(pv.toLowerCase))
|
||||||
|
(p, r.orNull)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = {
|
||||||
|
if (r.getInstance() == null || r.getInstance().isEmpty)
|
||||||
|
return List()
|
||||||
|
r.getInstance()
|
||||||
|
.asScala
|
||||||
|
.filter(i => i.getUrl != null && !i.getUrl.isEmpty)
|
||||||
|
.filter(i => i.getPid != null && i.getUrl != null)
|
||||||
|
.flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList))
|
||||||
|
.map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2))
|
||||||
|
.distinct
|
||||||
|
.toList
|
||||||
|
}
|
||||||
|
|
||||||
|
def resultToSummary(r: Result): ScholixSummary = {
|
||||||
|
val s = new ScholixSummary
|
||||||
|
s.setId(r.getId)
|
||||||
|
if (r.getPid == null || r.getPid.isEmpty)
|
||||||
|
return null
|
||||||
|
|
||||||
|
val persistentIdentifiers: List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r)
|
||||||
|
if (persistentIdentifiers.isEmpty)
|
||||||
|
return null
|
||||||
|
s.setLocalIdentifier(persistentIdentifiers.asJava)
|
||||||
|
if (r.isInstanceOf[Publication])
|
||||||
|
s.setTypology(Typology.publication)
|
||||||
|
else
|
||||||
|
s.setTypology(Typology.dataset)
|
||||||
|
|
||||||
|
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
|
||||||
|
|
||||||
|
if (r.getTitle != null && r.getTitle.asScala.nonEmpty) {
|
||||||
|
val titles: List[String] = r.getTitle.asScala.map(t => t.getValue).toList
|
||||||
|
if (titles.nonEmpty)
|
||||||
|
s.setTitle(titles.asJava)
|
||||||
|
else
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r.getAuthor != null && !r.getAuthor.isEmpty) {
|
||||||
|
val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname).toList
|
||||||
|
if (authors.nonEmpty)
|
||||||
|
s.setAuthor(authors.asJava)
|
||||||
|
}
|
||||||
|
if (r.getInstance() != null) {
|
||||||
|
val dt: List[String] = r
|
||||||
|
.getInstance()
|
||||||
|
.asScala
|
||||||
|
.filter(i => i.getDateofacceptance != null)
|
||||||
|
.map(i => i.getDateofacceptance.getValue)
|
||||||
|
.toList
|
||||||
|
if (dt.nonEmpty)
|
||||||
|
s.setDate(dt.distinct.asJava)
|
||||||
|
}
|
||||||
|
if (r.getDescription != null && !r.getDescription.isEmpty) {
|
||||||
|
val d = r.getDescription.asScala.find(f => f != null && f.getValue != null)
|
||||||
|
if (d.isDefined)
|
||||||
|
s.setDescription(d.get.getValue)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r.getSubject != null && !r.getSubject.isEmpty) {
|
||||||
|
val subjects: List[SchemeValue] = r.getSubject.asScala
|
||||||
|
.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))
|
||||||
|
.toList
|
||||||
|
if (subjects.nonEmpty)
|
||||||
|
s.setSubject(subjects.asJava)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r.getPublisher != null)
|
||||||
|
s.setPublisher(List(r.getPublisher.getValue).asJava)
|
||||||
|
|
||||||
|
if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) {
|
||||||
|
val cf: List[CollectedFromType] = r.getCollectedfrom.asScala
|
||||||
|
.map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))
|
||||||
|
.toList
|
||||||
|
if (cf.nonEmpty)
|
||||||
|
s.setDatasources(cf.distinct.asJava)
|
||||||
|
}
|
||||||
|
|
||||||
|
s.setRelatedDatasets(0)
|
||||||
|
s.setRelatedPublications(0)
|
||||||
|
s.setRelatedUnknown(0)
|
||||||
|
|
||||||
|
s
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -7,10 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class ArgumentApplicationParserTest {
|
class ArgumentApplicationParserTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseParameter() throws Exception {
|
void testParseParameter() throws Exception {
|
||||||
final String jsonConfiguration = IOUtils
|
final String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));
|
this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));
|
||||||
|
|
|
@ -21,13 +21,13 @@ public class HdfsSupportTest {
|
||||||
class Remove {
|
class Remove {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowARuntimeExceptionOnError() {
|
void shouldThrowARuntimeExceptionOnError() {
|
||||||
// when
|
// when
|
||||||
assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
|
assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
|
void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
|
||||||
// when
|
// when
|
||||||
HdfsSupport.remove(tempDir.toString(), new Configuration());
|
HdfsSupport.remove(tempDir.toString(), new Configuration());
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ public class HdfsSupportTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
|
void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
|
||||||
// given
|
// given
|
||||||
Path file = Files.createTempFile(tempDir, "p", "s");
|
Path file = Files.createTempFile(tempDir, "p", "s");
|
||||||
|
|
||||||
|
@ -52,13 +52,13 @@ public class HdfsSupportTest {
|
||||||
class ListFiles {
|
class ListFiles {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowARuntimeExceptionOnError() {
|
void shouldThrowARuntimeExceptionOnError() {
|
||||||
// when
|
// when
|
||||||
assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
|
assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
|
void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
|
||||||
Path subDir1 = Files.createTempDirectory(tempDir, "list_me");
|
Path subDir1 = Files.createTempDirectory(tempDir, "list_me");
|
||||||
Path subDir2 = Files.createTempDirectory(tempDir, "list_me");
|
Path subDir2 = Files.createTempDirectory(tempDir, "list_me");
|
||||||
|
|
||||||
|
|
|
@ -5,10 +5,10 @@ import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class PacePersonTest {
|
class PacePersonTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pacePersonTest1() {
|
void pacePersonTest1() {
|
||||||
|
|
||||||
PacePerson p = new PacePerson("Artini, Michele", false);
|
PacePerson p = new PacePerson("Artini, Michele", false);
|
||||||
assertEquals("Artini", p.getSurnameString());
|
assertEquals("Artini", p.getSurnameString());
|
||||||
|
@ -17,7 +17,7 @@ public class PacePersonTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pacePersonTest2() {
|
void pacePersonTest2() {
|
||||||
PacePerson p = new PacePerson("Michele G. Artini", false);
|
PacePerson p = new PacePerson("Michele G. Artini", false);
|
||||||
assertEquals("Artini, Michele G.", p.getNormalisedFullname());
|
assertEquals("Artini, Michele G.", p.getNormalisedFullname());
|
||||||
assertEquals("Michele G", p.getNameString());
|
assertEquals("Michele G", p.getNameString());
|
||||||
|
|
|
@ -18,7 +18,8 @@ public class SparkSessionSupportTest {
|
||||||
class RunWithSparkSession {
|
class RunWithSparkSession {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
|
@SuppressWarnings("unchecked")
|
||||||
|
void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
SparkSession spark = mock(SparkSession.class);
|
SparkSession spark = mock(SparkSession.class);
|
||||||
|
@ -37,7 +38,8 @@ public class SparkSessionSupportTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
|
@SuppressWarnings("unchecked")
|
||||||
|
void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
SparkSession spark = mock(SparkSession.class);
|
SparkSession spark = mock(SparkSession.class);
|
||||||
|
|
|
@ -12,7 +12,7 @@ import org.junit.jupiter.api.Disabled;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
@Disabled
|
@Disabled
|
||||||
public class ZenodoAPIClientTest {
|
class ZenodoAPIClientTest {
|
||||||
|
|
||||||
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
||||||
private final String ACCESS_TOKEN = "";
|
private final String ACCESS_TOKEN = "";
|
||||||
|
@ -22,7 +22,7 @@ public class ZenodoAPIClientTest {
|
||||||
private final String depositionId = "674915";
|
private final String depositionId = "674915";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
|
void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
|
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
|
||||||
|
@ -44,7 +44,7 @@ public class ZenodoAPIClientTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNewDeposition() throws IOException {
|
void testNewDeposition() throws IOException {
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
|
@ -67,7 +67,7 @@ public class ZenodoAPIClientTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
|
@ -87,7 +87,7 @@ public class ZenodoAPIClientTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
|
|
|
@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class AuthorMergerTest {
|
class AuthorMergerTest {
|
||||||
|
|
||||||
private String publicationsBasePath;
|
private String publicationsBasePath;
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ public class AuthorMergerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void mergeTest() { // used in the dedup: threshold set to 0.95
|
void mergeTest() { // used in the dedup: threshold set to 0.95
|
||||||
|
|
||||||
for (List<Author> authors1 : authors) {
|
for (List<Author> authors1 : authors) {
|
||||||
System.out.println("List " + (authors.indexOf(authors1) + 1));
|
System.out.println("List " + (authors.indexOf(authors1) + 1));
|
||||||
|
|
|
@ -4,12 +4,8 @@ package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.time.LocalDate;
|
|
||||||
import java.time.format.DateTimeFormatter;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
@ -19,15 +15,34 @@ import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import me.xuender.unidecode.Unidecode;
|
||||||
|
|
||||||
public class OafMapperUtilsTest {
|
class OafMapperUtilsTest {
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDateValidation() {
|
public void testUnidecode() {
|
||||||
|
|
||||||
|
assertEquals("Liu Ben Mu hiruzuSen tawa", Unidecode.decode("六本木ヒルズ森タワ"));
|
||||||
|
assertEquals("Nan Wu A Mi Tuo Fo", Unidecode.decode("南无阿弥陀佛"));
|
||||||
|
assertEquals("Yi Tiao Hui Zou Lu De Yu", Unidecode.decode("一条会走路的鱼"));
|
||||||
|
assertEquals("amidaniyorai", Unidecode.decode("あみだにょらい"));
|
||||||
|
assertEquals("T`owrk`iayi", Unidecode.decode("Թուրքիայի"));
|
||||||
|
assertEquals("Obzor tematiki", Unidecode.decode("Обзор тематики"));
|
||||||
|
assertEquals("GERMANSKIE IaZYKI", Unidecode.decode("ГЕРМАНСКИЕ ЯЗЫКИ"));
|
||||||
|
assertEquals("Diereunese tes ikanopoieses", Unidecode.decode("Διερεύνηση της ικανοποίησης"));
|
||||||
|
assertEquals("lqDy l'wly@", Unidecode.decode("القضايا الأولية"));
|
||||||
|
assertEquals("abc def ghi", Unidecode.decode("abc def ghi"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testDateValidation() {
|
||||||
|
|
||||||
assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent());
|
assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent());
|
||||||
assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
|
assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
|
||||||
|
@ -92,7 +107,7 @@ public class OafMapperUtilsTest {
|
||||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get());
|
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get());
|
||||||
assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get());
|
assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get());
|
||||||
assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get());
|
assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get());
|
||||||
assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get());
|
assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get());
|
||||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get());
|
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get());
|
||||||
assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get());
|
assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get());
|
||||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get());
|
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get());
|
||||||
|
@ -132,44 +147,46 @@ public class OafMapperUtilsTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDate() {
|
void testDate() {
|
||||||
System.out.println(GraphCleaningFunctions.cleanDate("23-FEB-1998"));
|
final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
|
||||||
|
assertNotNull(date);
|
||||||
|
System.out.println(date);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMergePubs() throws IOException {
|
void testMergePubs() throws IOException {
|
||||||
Publication p1 = read("publication_1.json", Publication.class);
|
Publication p1 = read("publication_1.json", Publication.class);
|
||||||
Publication p2 = read("publication_2.json", Publication.class);
|
Publication p2 = read("publication_2.json", Publication.class);
|
||||||
Dataset d1 = read("dataset_1.json", Dataset.class);
|
Dataset d1 = read("dataset_1.json", Dataset.class);
|
||||||
Dataset d2 = read("dataset_2.json", Dataset.class);
|
Dataset d2 = read("dataset_2.json", Dataset.class);
|
||||||
|
|
||||||
assertEquals(p1.getCollectedfrom().size(), 1);
|
assertEquals(1, p1.getCollectedfrom().size());
|
||||||
assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID);
|
assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
|
||||||
assertEquals(d2.getCollectedfrom().size(), 1);
|
assertEquals(1, d2.getCollectedfrom().size());
|
||||||
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
|
|
||||||
assertTrue(
|
assertEquals(
|
||||||
|
ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.mergeResults(p1, d2)
|
.mergeResults(p1, d2)
|
||||||
.getResulttype()
|
.getResulttype()
|
||||||
.getClassid()
|
.getClassid());
|
||||||
.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID));
|
|
||||||
|
|
||||||
assertEquals(p2.getCollectedfrom().size(), 1);
|
assertEquals(1, p2.getCollectedfrom().size());
|
||||||
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
assertEquals(d1.getCollectedfrom().size(), 1);
|
assertEquals(1, d1.getCollectedfrom().size());
|
||||||
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
|
|
||||||
assertTrue(
|
assertEquals(
|
||||||
|
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.mergeResults(p2, d1)
|
.mergeResults(p2, d1)
|
||||||
.getResulttype()
|
.getResulttype()
|
||||||
.getClassid()
|
.getClassid());
|
||||||
.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
|
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
|
||||||
return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new));
|
return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
|
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
|
||||||
|
|
|
@ -3,10 +3,10 @@ package eu.dnetlib.scholexplorer.relation;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class RelationMapperTest {
|
class RelationMapperTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testLoadRels() throws Exception {
|
void testLoadRels() throws Exception {
|
||||||
|
|
||||||
RelationMapper relationMapper = RelationMapper.load();
|
RelationMapper relationMapper = RelationMapper.load();
|
||||||
relationMapper.keySet().forEach(System.out::println);
|
relationMapper.keySet().forEach(System.out::println);
|
||||||
|
|
|
@ -3,40 +3,37 @@ package eu.dnetlib.dhp.actionmanager;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.*;
|
import java.util.List;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.tuple.Triple;
|
import org.apache.commons.lang3.tuple.Triple;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
import org.dom4j.Element;
|
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
import com.google.common.base.Joiner;
|
||||||
import com.google.common.base.Splitter;
|
import com.google.common.base.Splitter;
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
import eu.dnetlib.actionmanager.rmi.ActionManagerException;
|
import eu.dnetlib.actionmanager.rmi.ActionManagerException;
|
||||||
import eu.dnetlib.actionmanager.set.ActionManagerSet;
|
|
||||||
import eu.dnetlib.actionmanager.set.ActionManagerSet.ImpactTypes;
|
|
||||||
import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJob;
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
public class ISClient implements Serializable {
|
public class ISClient implements Serializable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
|
private static final Logger log = LoggerFactory.getLogger(ISClient.class);
|
||||||
|
|
||||||
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
|
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
|
||||||
|
|
||||||
private final ISLookUpService isLookup;
|
private final transient ISLookUpService isLookup;
|
||||||
|
|
||||||
public ISClient(String isLookupUrl) {
|
public ISClient(String isLookupUrl) {
|
||||||
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||||
|
@ -63,7 +60,7 @@ public class ISClient implements Serializable {
|
||||||
.map(
|
.map(
|
||||||
sets -> sets
|
sets -> sets
|
||||||
.stream()
|
.stream()
|
||||||
.map(set -> parseSetInfo(set))
|
.map(ISClient::parseSetInfo)
|
||||||
.filter(t -> ids.contains(t.getLeft()))
|
.filter(t -> ids.contains(t.getLeft()))
|
||||||
.map(t -> buildDirectory(basePath, t))
|
.map(t -> buildDirectory(basePath, t))
|
||||||
.collect(Collectors.toList()))
|
.collect(Collectors.toList()))
|
||||||
|
@ -73,15 +70,17 @@ public class ISClient implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Triple<String, String, String> parseSetInfo(String set) {
|
private static Triple<String, String, String> parseSetInfo(String set) {
|
||||||
try {
|
try {
|
||||||
Document doc = new SAXReader().read(new StringReader(set));
|
final SAXReader reader = new SAXReader();
|
||||||
|
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||||
|
Document doc = reader.read(new StringReader(set));
|
||||||
return Triple
|
return Triple
|
||||||
.of(
|
.of(
|
||||||
doc.valueOf("//SET/@id"),
|
doc.valueOf("//SET/@id"),
|
||||||
doc.valueOf("//SET/@directory"),
|
doc.valueOf("//SET/@directory"),
|
||||||
doc.valueOf("//SET/@latest"));
|
doc.valueOf("//SET/@latest"));
|
||||||
} catch (DocumentException e) {
|
} catch (DocumentException | SAXException e) {
|
||||||
throw new IllegalStateException(e);
|
throw new IllegalStateException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -99,7 +98,7 @@ public class ISClient implements Serializable {
|
||||||
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
|
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
|
||||||
+ propertyName
|
+ propertyName
|
||||||
+ "']/@value/string()";
|
+ "']/@value/string()";
|
||||||
log.debug("quering for service property: " + q);
|
log.debug("quering for service property: {}", q);
|
||||||
try {
|
try {
|
||||||
final List<String> value = isLookup.quickSearchProfile(q);
|
final List<String> value = isLookup.quickSearchProfile(q);
|
||||||
return Iterables.getOnlyElement(value);
|
return Iterables.getOnlyElement(value);
|
||||||
|
|
|
@ -62,6 +62,7 @@ public class MergeAndGet {
|
||||||
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
|
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
|
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
|
||||||
if (x.getClass().equals(y.getClass())
|
if (x.getClass().equals(y.getClass())
|
||||||
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
|
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
|
||||||
|
|
|
@ -74,7 +74,9 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
.orElse(true);
|
.orElse(true);
|
||||||
logger.info("shouldGroupById: {}", shouldGroupById);
|
logger.info("shouldGroupById: {}", shouldGroupById);
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
|
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
|
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
|
||||||
|
|
||||||
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
|
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
|
||||||
|
@ -152,7 +154,7 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
return spark
|
return spark
|
||||||
.read()
|
.read()
|
||||||
.parquet(path)
|
.parquet(path)
|
||||||
.map((MapFunction<Row, String>) value -> extractPayload(value), Encoders.STRING())
|
.map((MapFunction<Row, String>) PromoteActionPayloadForGraphTableJob::extractPayload, Encoders.STRING())
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<String, A>) value -> decodePayload(actionPayloadClazz, value),
|
(MapFunction<String, A>) value -> decodePayload(actionPayloadClazz, value),
|
||||||
Encoders.bean(actionPayloadClazz));
|
Encoders.bean(actionPayloadClazz));
|
||||||
|
|
|
@ -107,7 +107,7 @@
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=2560
|
--conf spark.sql.shuffle.partitions=5000
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/publication</arg>
|
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/publication</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
|
@ -159,7 +159,7 @@
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=2560
|
--conf spark.sql.shuffle.partitions=5000
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputGraphTablePath</arg><arg>${workingDir}/publication</arg>
|
<arg>--inputGraphTablePath</arg><arg>${workingDir}/publication</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
|
|
|
@ -99,7 +99,7 @@
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=2560
|
--conf spark.sql.shuffle.partitions=5000
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/relation</arg>
|
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/relation</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
||||||
|
|
|
@ -80,7 +80,7 @@ public class PartitionActionSetsByPayloadTypeJobTest {
|
||||||
private ISClient isClient;
|
private ISClient isClient;
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception {
|
void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception {
|
||||||
// given
|
// given
|
||||||
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
|
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
|
||||||
Path outputDir = workingDir.resolve("output");
|
Path outputDir = workingDir.resolve("output");
|
||||||
|
|
|
@ -20,7 +20,7 @@ public class MergeAndGetTest {
|
||||||
class MergeFromAndGetStrategy {
|
class MergeFromAndGetStrategy {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafAndOaf() {
|
void shouldThrowForOafAndOaf() {
|
||||||
// given
|
// given
|
||||||
Oaf a = mock(Oaf.class);
|
Oaf a = mock(Oaf.class);
|
||||||
Oaf b = mock(Oaf.class);
|
Oaf b = mock(Oaf.class);
|
||||||
|
@ -33,7 +33,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafAndRelation() {
|
void shouldThrowForOafAndRelation() {
|
||||||
// given
|
// given
|
||||||
Oaf a = mock(Oaf.class);
|
Oaf a = mock(Oaf.class);
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
@ -46,7 +46,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafAndOafEntity() {
|
void shouldThrowForOafAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
Oaf a = mock(Oaf.class);
|
Oaf a = mock(Oaf.class);
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
@ -59,7 +59,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForRelationAndOaf() {
|
void shouldThrowForRelationAndOaf() {
|
||||||
// given
|
// given
|
||||||
Relation a = mock(Relation.class);
|
Relation a = mock(Relation.class);
|
||||||
Oaf b = mock(Oaf.class);
|
Oaf b = mock(Oaf.class);
|
||||||
|
@ -72,7 +72,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForRelationAndOafEntity() {
|
void shouldThrowForRelationAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
Relation a = mock(Relation.class);
|
Relation a = mock(Relation.class);
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
@ -85,7 +85,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldBehaveProperlyForRelationAndRelation() {
|
void shouldBehaveProperlyForRelationAndRelation() {
|
||||||
// given
|
// given
|
||||||
Relation a = mock(Relation.class);
|
Relation a = mock(Relation.class);
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
@ -101,7 +101,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndOaf() {
|
void shouldThrowForOafEntityAndOaf() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
Oaf b = mock(Oaf.class);
|
Oaf b = mock(Oaf.class);
|
||||||
|
@ -114,7 +114,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndRelation() {
|
void shouldThrowForOafEntityAndRelation() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
@ -127,7 +127,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
|
void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
|
||||||
// given
|
// given
|
||||||
class OafEntitySub1 extends OafEntity {
|
class OafEntitySub1 extends OafEntity {
|
||||||
}
|
}
|
||||||
|
@ -145,7 +145,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldBehaveProperlyForOafEntityAndOafEntity() {
|
void shouldBehaveProperlyForOafEntityAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
@ -165,7 +165,7 @@ public class MergeAndGetTest {
|
||||||
class SelectNewerAndGetStrategy {
|
class SelectNewerAndGetStrategy {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndRelation() {
|
void shouldThrowForOafEntityAndRelation() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
@ -178,7 +178,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForRelationAndOafEntity() {
|
void shouldThrowForRelationAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
Relation a = mock(Relation.class);
|
Relation a = mock(Relation.class);
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
@ -191,7 +191,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndResult() {
|
void shouldThrowForOafEntityAndResult() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
Result b = mock(Result.class);
|
Result b = mock(Result.class);
|
||||||
|
@ -204,7 +204,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
|
void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
// real types must be used because subclass-superclass resolution does not work for
|
// real types must be used because subclass-superclass resolution does not work for
|
||||||
// mocks
|
// mocks
|
||||||
|
@ -221,7 +221,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldShouldReturnLeftForOafEntityAndOafEntity() {
|
void shouldShouldReturnLeftForOafEntityAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
when(a.getLastupdatetimestamp()).thenReturn(1L);
|
when(a.getLastupdatetimestamp()).thenReturn(1L);
|
||||||
|
@ -238,7 +238,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldShouldReturnRightForOafEntityAndOafEntity() {
|
void shouldShouldReturnRightForOafEntityAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
when(a.getLastupdatetimestamp()).thenReturn(2L);
|
when(a.getLastupdatetimestamp()).thenReturn(2L);
|
||||||
|
|
|
@ -77,7 +77,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
class Main {
|
class Main {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() {
|
void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() {
|
||||||
// given
|
// given
|
||||||
Class<Relation> rowClazz = Relation.class;
|
Class<Relation> rowClazz = Relation.class;
|
||||||
Class<OafEntity> actionPayloadClazz = OafEntity.class;
|
Class<OafEntity> actionPayloadClazz = OafEntity.class;
|
||||||
|
@ -116,7 +116,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
|
|
||||||
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
|
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
|
||||||
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
|
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
|
||||||
public void shouldPromoteActionPayloadForGraphTable(
|
void shouldPromoteActionPayloadForGraphTable(
|
||||||
MergeAndGet.Strategy strategy,
|
MergeAndGet.Strategy strategy,
|
||||||
Class<? extends Oaf> rowClazz,
|
Class<? extends Oaf> rowClazz,
|
||||||
Class<? extends Oaf> actionPayloadClazz)
|
Class<? extends Oaf> actionPayloadClazz)
|
||||||
|
|
|
@ -44,7 +44,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
class JoinTableWithActionPayloadAndMerge {
|
class JoinTableWithActionPayloadAndMerge {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
|
void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
|
||||||
// given
|
// given
|
||||||
class OafImpl extends Oaf {
|
class OafImpl extends Oaf {
|
||||||
}
|
}
|
||||||
|
@ -58,7 +58,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() {
|
void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() {
|
||||||
// given
|
// given
|
||||||
String id0 = "id0";
|
String id0 = "id0";
|
||||||
String id1 = "id1";
|
String id1 = "id1";
|
||||||
|
@ -138,7 +138,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() {
|
void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() {
|
||||||
// given
|
// given
|
||||||
String id0 = "id0";
|
String id0 = "id0";
|
||||||
String id1 = "id1";
|
String id1 = "id1";
|
||||||
|
@ -218,7 +218,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
class GroupTableByIdAndMerge {
|
class GroupTableByIdAndMerge {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRunProperly() {
|
void shouldRunProperly() {
|
||||||
// given
|
// given
|
||||||
String id1 = "id1";
|
String id1 = "id1";
|
||||||
String id2 = "id2";
|
String id2 = "id2";
|
||||||
|
|
|
@ -29,6 +29,13 @@
|
||||||
<goal>testCompile</goal>
|
<goal>testCompile</goal>
|
||||||
</goals>
|
</goals>
|
||||||
</execution>
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>scala-doc</id>
|
||||||
|
<phase>process-resources</phase> <!-- or wherever -->
|
||||||
|
<goals>
|
||||||
|
<goal>doc</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
<configuration>
|
<configuration>
|
||||||
<scalaVersion>${scala.version}</scalaVersion>
|
<scalaVersion>${scala.version}</scalaVersion>
|
||||||
|
@ -84,14 +91,6 @@
|
||||||
<artifactId>json</artifactId>
|
<artifactId>json</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.commons</groupId>
|
|
||||||
<artifactId>commons-csv</artifactId>
|
|
||||||
<version>1.8</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
|
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.poi</groupId>
|
<groupId>org.apache.poi</groupId>
|
||||||
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
|
public class Constants {
|
||||||
|
|
||||||
|
public static final String DOI = "doi";
|
||||||
|
public static final String DOI_CLASSNAME = "Digital Object Identifier";
|
||||||
|
|
||||||
|
public static final String DEFAULT_DELIMITER = ",";
|
||||||
|
|
||||||
|
public static final String UPDATE_DATA_INFO_TYPE = "update";
|
||||||
|
public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos";
|
||||||
|
public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE";
|
||||||
|
public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip";
|
||||||
|
public static final String UPDATE_SUBJECT_SDG_CLASS_ID = "subject:sdg";
|
||||||
|
|
||||||
|
public static final String FOS_CLASS_ID = "FOS";
|
||||||
|
public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification";
|
||||||
|
|
||||||
|
public static final String SDG_CLASS_ID = "SDG";
|
||||||
|
public static final String SDG_CLASS_NAME = "Sustainable Development Goals";
|
||||||
|
|
||||||
|
public static final String NULL = "NULL";
|
||||||
|
|
||||||
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private Constants() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Boolean isSparkSessionManaged(ArgumentApplicationParser parser) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <R> Dataset<R> readPath(
|
||||||
|
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||||
|
return spark
|
||||||
|
.read()
|
||||||
|
.textFile(inputPath)
|
||||||
|
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static StructuredProperty getSubject(String sbj, String classid, String classname,
|
||||||
|
String diqualifierclassid) {
|
||||||
|
if (sbj.equals(NULL))
|
||||||
|
return null;
|
||||||
|
StructuredProperty sp = new StructuredProperty();
|
||||||
|
sp.setValue(sbj);
|
||||||
|
sp
|
||||||
|
.setQualifier(
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
classid,
|
||||||
|
classname,
|
||||||
|
ModelConstants.DNET_SUBJECT_TYPOLOGIES,
|
||||||
|
ModelConstants.DNET_SUBJECT_TYPOLOGIES));
|
||||||
|
sp
|
||||||
|
.setDataInfo(
|
||||||
|
OafMapperUtils
|
||||||
|
.dataInfo(
|
||||||
|
false,
|
||||||
|
UPDATE_DATA_INFO_TYPE,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
diqualifierclassid,
|
||||||
|
UPDATE_CLASS_NAME,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||||
|
""));
|
||||||
|
|
||||||
|
return sp;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,84 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.bipfinder;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Just collects all the atomic actions produced for the different results and saves them in
|
|
||||||
* outputpath for the ActionSet
|
|
||||||
*/
|
|
||||||
public class CollectAndSave implements Serializable {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class);
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
|
|
||||||
public static <I extends Result> void main(String[] args) throws Exception {
|
|
||||||
|
|
||||||
String jsonConfiguration = IOUtils
|
|
||||||
.toString(
|
|
||||||
CollectAndSave.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"));
|
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
|
||||||
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.TRUE);
|
|
||||||
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
|
||||||
|
|
||||||
final String inputPath = parser.get("inputPath");
|
|
||||||
log.info("inputPath {}: ", inputPath);
|
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
|
||||||
log.info("outputPath {}: ", outputPath);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> {
|
|
||||||
removeOutputDir(spark, outputPath);
|
|
||||||
collectAndSave(spark, inputPath, outputPath);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void collectAndSave(SparkSession spark, String inputPath, String outputPath) {
|
|
||||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
||||||
|
|
||||||
sc
|
|
||||||
.sequenceFile(inputPath + "/publication", Text.class, Text.class)
|
|
||||||
.union(sc.sequenceFile(inputPath + "/dataset", Text.class, Text.class))
|
|
||||||
.union(sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class))
|
|
||||||
.union(sc.sequenceFile(inputPath + "/software", Text.class, Text.class))
|
|
||||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void removeOutputDir(SparkSession spark, String path) {
|
|
||||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,28 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.bipfinder;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Subset of the information of the generic results that are needed to create the atomic action
|
|
||||||
*/
|
|
||||||
public class PreparedResult implements Serializable {
|
|
||||||
private String id; // openaire id
|
|
||||||
private String value; // doi
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getValue() {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setValue(String value) {
|
|
||||||
this.value = value;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.bipfinder;
|
package eu.dnetlib.dhp.actionmanager.bipfinder;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.Constants.*;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -15,7 +16,6 @@ import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
@ -24,11 +24,15 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.bipmodel.BipDeserialize;
|
||||||
|
import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -46,7 +50,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
.toString(
|
.toString(
|
||||||
SparkAtomicActionScoreJob.class
|
SparkAtomicActionScoreJob.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json"));
|
"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
@ -65,14 +69,6 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath {}: ", outputPath);
|
log.info("outputPath {}: ", outputPath);
|
||||||
|
|
||||||
final String bipScorePath = parser.get("bipScorePath");
|
|
||||||
log.info("bipScorePath: {}", bipScorePath);
|
|
||||||
|
|
||||||
final String resultClassName = parser.get("resultTableName");
|
|
||||||
log.info("resultTableName: {}", resultClassName);
|
|
||||||
|
|
||||||
Class<I> inputClazz = (Class<I>) Class.forName(resultClassName);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(
|
||||||
|
@ -80,14 +76,13 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
prepareResults(spark, inputPath, outputPath, bipScorePath, inputClazz);
|
prepareResults(spark, inputPath, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath,
|
private static <I extends Result> void prepareResults(SparkSession spark, String bipScorePath, String outputPath) {
|
||||||
String bipScorePath, Class<I> inputClazz) {
|
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
|
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
|
||||||
.textFile(bipScorePath)
|
.textFile(bipScorePath)
|
||||||
|
@ -101,43 +96,19 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
return bs;
|
return bs;
|
||||||
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class));
|
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class));
|
||||||
|
|
||||||
System.out.println(bipScores.count());
|
|
||||||
|
|
||||||
Dataset<I> results = readPath(spark, inputPath, inputClazz);
|
|
||||||
|
|
||||||
results.createOrReplaceTempView("result");
|
|
||||||
|
|
||||||
Dataset<PreparedResult> preparedResult = spark
|
|
||||||
.sql(
|
|
||||||
"select pIde.value value, id " +
|
|
||||||
"from result " +
|
|
||||||
"lateral view explode (pid) p as pIde " +
|
|
||||||
"where dataInfo.deletedbyinference = false and pIde.qualifier.classid = '" + DOI + "'")
|
|
||||||
.as(Encoders.bean(PreparedResult.class));
|
|
||||||
|
|
||||||
bipScores
|
bipScores
|
||||||
.joinWith(
|
|
||||||
preparedResult, bipScores.col("id").equalTo(preparedResult.col("value")),
|
|
||||||
"inner")
|
|
||||||
.map((MapFunction<Tuple2<BipScore, PreparedResult>, BipScore>) value -> {
|
|
||||||
BipScore ret = value._1();
|
|
||||||
ret.setId(value._2().getId());
|
|
||||||
return ret;
|
|
||||||
}, Encoders.bean(BipScore.class))
|
|
||||||
.groupByKey((MapFunction<BipScore, String>) value -> value.getId(), Encoders.STRING())
|
|
||||||
.mapGroups((MapGroupsFunction<String, BipScore, Result>) (k, it) -> {
|
|
||||||
Result ret = new Result();
|
|
||||||
ret.setDataInfo(getDataInfo());
|
|
||||||
BipScore first = it.next();
|
|
||||||
ret.setId(first.getId());
|
|
||||||
|
|
||||||
ret.setMeasures(getMeasure(first));
|
.map((MapFunction<BipScore, Result>) bs -> {
|
||||||
it.forEachRemaining(value -> ret.getMeasures().addAll(getMeasure(value)));
|
Result ret = new Result();
|
||||||
|
|
||||||
|
ret.setId(bs.getId());
|
||||||
|
|
||||||
|
ret.setMeasures(getMeasure(bs));
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}, Encoders.bean(Result.class))
|
}, Encoders.bean(Result.class))
|
||||||
.toJavaRDD()
|
.toJavaRDD()
|
||||||
.map(p -> new AtomicAction(inputClazz, p))
|
.map(p -> new AtomicAction(Result.class, p))
|
||||||
.mapToPair(
|
.mapToPair(
|
||||||
aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||||
new Text(OBJECT_MAPPER.writeValueAsString(aa))))
|
new Text(OBJECT_MAPPER.writeValueAsString(aa))))
|
||||||
|
@ -161,7 +132,21 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
KeyValue kv = new KeyValue();
|
KeyValue kv = new KeyValue();
|
||||||
kv.setValue(unit.getValue());
|
kv.setValue(unit.getValue());
|
||||||
kv.setKey(unit.getKey());
|
kv.setKey(unit.getKey());
|
||||||
kv.setDataInfo(getDataInfo());
|
kv
|
||||||
|
.setDataInfo(
|
||||||
|
OafMapperUtils
|
||||||
|
.dataInfo(
|
||||||
|
false,
|
||||||
|
UPDATE_DATA_INFO_TYPE,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
UPDATE_MEASURE_BIP_CLASS_ID,
|
||||||
|
UPDATE_CLASS_NAME,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||||
|
""));
|
||||||
return kv;
|
return kv;
|
||||||
})
|
})
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
|
@ -170,21 +155,6 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static DataInfo getDataInfo() {
|
|
||||||
DataInfo di = new DataInfo();
|
|
||||||
di.setInferred(false);
|
|
||||||
di.setInvisible(false);
|
|
||||||
di.setDeletedbyinference(false);
|
|
||||||
di.setTrust("");
|
|
||||||
Qualifier qualifier = new Qualifier();
|
|
||||||
qualifier.setClassid("sysimport:actionset");
|
|
||||||
qualifier.setClassname("Harvested");
|
|
||||||
qualifier.setSchemename("dnet:provenanceActions");
|
|
||||||
qualifier.setSchemeid("dnet:provenanceActions");
|
|
||||||
di.setProvenanceaction(qualifier);
|
|
||||||
return di;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void removeOutputDir(SparkSession spark, String path) {
|
private static void removeOutputDir(SparkSession spark, String path) {
|
||||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.bipfinder;
|
package eu.dnetlib.dhp.actionmanager.bipmodel;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.bipfinder;
|
package eu.dnetlib.dhp.actionmanager.bipmodel;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.bipfinder;
|
package eu.dnetlib.dhp.actionmanager.bipmodel;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.bipfinder;
|
package eu.dnetlib.dhp.actionmanager.bipmodel;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
|
@ -0,0 +1,91 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER;
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged;
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.*;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
public class GetFOSSparkJob implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(GetFOSSparkJob.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
GetFOSSparkJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
// the path where the original fos csv file is stored
|
||||||
|
final String sourcePath = parser.get("sourcePath");
|
||||||
|
log.info("sourcePath {}", sourcePath);
|
||||||
|
|
||||||
|
// the path where to put the file as json
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath {}", outputPath);
|
||||||
|
|
||||||
|
final String delimiter = Optional
|
||||||
|
.ofNullable(parser.get("delimiter"))
|
||||||
|
.orElse(DEFAULT_DELIMITER);
|
||||||
|
|
||||||
|
SparkConf sconf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
sconf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
getFOS(
|
||||||
|
spark,
|
||||||
|
sourcePath,
|
||||||
|
outputPath,
|
||||||
|
delimiter);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void getFOS(SparkSession spark, String sourcePath, String outputPath, String delimiter) {
|
||||||
|
Dataset<Row> fosData = spark
|
||||||
|
.read()
|
||||||
|
.format("csv")
|
||||||
|
.option("sep", delimiter)
|
||||||
|
.option("inferSchema", "true")
|
||||||
|
.option("header", "true")
|
||||||
|
.option("quotes", "\"")
|
||||||
|
.load(sourcePath);
|
||||||
|
|
||||||
|
fosData.map((MapFunction<Row, FOSDataModel>) r -> {
|
||||||
|
FOSDataModel fosDataModel = new FOSDataModel();
|
||||||
|
fosDataModel.setDoi(r.getString(0).toLowerCase());
|
||||||
|
fosDataModel.setLevel1(r.getString(1));
|
||||||
|
fosDataModel.setLevel2(r.getString(2));
|
||||||
|
fosDataModel.setLevel3(r.getString(3));
|
||||||
|
return fosDataModel;
|
||||||
|
}, Encoders.bean(FOSDataModel.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,91 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER;
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged;
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.*;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
|
||||||
|
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel;
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
public class GetSDGSparkJob implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(GetSDGSparkJob.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
GetSDGSparkJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
// the path where the original fos csv file is stored
|
||||||
|
final String sourcePath = parser.get("sourcePath");
|
||||||
|
log.info("sourcePath {}", sourcePath);
|
||||||
|
|
||||||
|
// the path where to put the file as json
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath {}", outputPath);
|
||||||
|
|
||||||
|
final String delimiter = Optional
|
||||||
|
.ofNullable(parser.get("delimiter"))
|
||||||
|
.orElse(DEFAULT_DELIMITER);
|
||||||
|
|
||||||
|
SparkConf sconf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
sconf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
getSDG(
|
||||||
|
spark,
|
||||||
|
sourcePath,
|
||||||
|
outputPath,
|
||||||
|
delimiter);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void getSDG(SparkSession spark, String sourcePath, String outputPath, String delimiter) {
|
||||||
|
Dataset<Row> sdgData = spark
|
||||||
|
.read()
|
||||||
|
.format("csv")
|
||||||
|
.option("sep", delimiter)
|
||||||
|
.option("inferSchema", "true")
|
||||||
|
.option("header", "true")
|
||||||
|
.option("quotes", "\"")
|
||||||
|
.load(sourcePath);
|
||||||
|
|
||||||
|
sdgData.map((MapFunction<Row, SDGDataModel>) r -> {
|
||||||
|
SDGDataModel sdgDataModel = new SDGDataModel();
|
||||||
|
sdgDataModel.setDoi(r.getString(0).toLowerCase());
|
||||||
|
sdgDataModel.setSbj(r.getString(1));
|
||||||
|
|
||||||
|
return sdgDataModel;
|
||||||
|
}, Encoders.bean(SDGDataModel.class))
|
||||||
|
.filter((FilterFunction<SDGDataModel>) sdg -> sdg.getSbj() != null)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,178 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.Constants.*;
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.Constants.UPDATE_CLASS_NAME;
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.bipmodel.BipDeserialize;
|
||||||
|
import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore;
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Measure;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
public class PrepareBipFinder implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(PrepareBipFinder.class);
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
PrepareBipFinder.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String sourcePath = parser.get("sourcePath");
|
||||||
|
log.info("sourcePath {}: ", sourcePath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath {}: ", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration());
|
||||||
|
prepareResults(spark, sourcePath, outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void prepareResults(SparkSession spark, String inputPath, String outputPath) {
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
|
||||||
|
.textFile(inputPath)
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class));
|
||||||
|
|
||||||
|
spark
|
||||||
|
.createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> {
|
||||||
|
BipScore bs = new BipScore();
|
||||||
|
bs.setId(key);
|
||||||
|
bs.setScoreList(entry.get(key));
|
||||||
|
|
||||||
|
return bs;
|
||||||
|
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class))
|
||||||
|
.map((MapFunction<BipScore, Result>) v -> {
|
||||||
|
Result r = new Result();
|
||||||
|
final String cleanedPid = CleaningFunctions.normalizePidValue(DOI, v.getId());
|
||||||
|
|
||||||
|
r.setId(DHPUtils.generateUnresolvedIdentifier(v.getId(), DOI));
|
||||||
|
Instance inst = new Instance();
|
||||||
|
inst.setMeasures(getMeasure(v));
|
||||||
|
|
||||||
|
inst
|
||||||
|
.setPid(
|
||||||
|
Arrays
|
||||||
|
.asList(
|
||||||
|
OafMapperUtils
|
||||||
|
.structuredProperty(
|
||||||
|
cleanedPid,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
DOI, DOI_CLASSNAME,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES),
|
||||||
|
null)));
|
||||||
|
r.setInstance(Arrays.asList(inst));
|
||||||
|
r
|
||||||
|
.setDataInfo(
|
||||||
|
OafMapperUtils
|
||||||
|
.dataInfo(
|
||||||
|
false, null, true,
|
||||||
|
false,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
ModelConstants.PROVENANCE_ENRICH,
|
||||||
|
null,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||||
|
null));
|
||||||
|
return r;
|
||||||
|
}, Encoders.bean(Result.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/bip");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Measure> getMeasure(BipScore value) {
|
||||||
|
return value
|
||||||
|
.getScoreList()
|
||||||
|
.stream()
|
||||||
|
.map(score -> {
|
||||||
|
Measure m = new Measure();
|
||||||
|
m.setId(score.getId());
|
||||||
|
m
|
||||||
|
.setUnit(
|
||||||
|
score
|
||||||
|
.getUnit()
|
||||||
|
.stream()
|
||||||
|
.map(unit -> {
|
||||||
|
KeyValue kv = new KeyValue();
|
||||||
|
kv.setValue(unit.getValue());
|
||||||
|
kv.setKey(unit.getKey());
|
||||||
|
kv
|
||||||
|
.setDataInfo(
|
||||||
|
OafMapperUtils
|
||||||
|
.dataInfo(
|
||||||
|
false,
|
||||||
|
UPDATE_DATA_INFO_TYPE,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
UPDATE_MEASURE_BIP_CLASS_ID,
|
||||||
|
UPDATE_CLASS_NAME,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||||
|
""));
|
||||||
|
return kv;
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
return m;
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,99 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.Constants.*;
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
public class PrepareFOSSparkJob implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(PrepareFOSSparkJob.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
PrepareFOSSparkJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
String sourcePath = parser.get("sourcePath");
|
||||||
|
log.info("sourcePath: {}", sourcePath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
distributeFOSdois(
|
||||||
|
spark,
|
||||||
|
sourcePath,
|
||||||
|
|
||||||
|
outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) {
|
||||||
|
Dataset<FOSDataModel> fosDataset = readPath(spark, sourcePath, FOSDataModel.class);
|
||||||
|
|
||||||
|
fosDataset
|
||||||
|
.groupByKey((MapFunction<FOSDataModel, String>) v -> v.getDoi().toLowerCase(), Encoders.STRING())
|
||||||
|
.mapGroups((MapGroupsFunction<String, FOSDataModel, Result>) (k, it) -> {
|
||||||
|
Result r = new Result();
|
||||||
|
FOSDataModel first = it.next();
|
||||||
|
r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI));
|
||||||
|
HashSet<String> level1 = new HashSet<>();
|
||||||
|
HashSet<String> level2 = new HashSet<>();
|
||||||
|
HashSet<String> level3 = new HashSet<>();
|
||||||
|
addLevels(level1, level2, level3, first);
|
||||||
|
it.forEachRemaining(v -> addLevels(level1, level2, level3, v));
|
||||||
|
List<StructuredProperty> sbjs = new ArrayList<>();
|
||||||
|
level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
|
||||||
|
level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
|
||||||
|
level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
|
||||||
|
r.setSubject(sbjs);
|
||||||
|
return r;
|
||||||
|
}, Encoders.bean(Result.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/fos");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void addLevels(HashSet<String> level1, HashSet<String> level2, HashSet<String> level3,
|
||||||
|
FOSDataModel first) {
|
||||||
|
level1.add(first.getLevel1());
|
||||||
|
level2.add(first.getLevel2());
|
||||||
|
level3.add(first.getLevel3());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,89 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.Constants.*;
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel;
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
public class PrepareSDGSparkJob implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(PrepareSDGSparkJob.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
PrepareSDGSparkJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
String sourcePath = parser.get("sourcePath");
|
||||||
|
log.info("sourcePath: {}", sourcePath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
doPrepare(
|
||||||
|
spark,
|
||||||
|
sourcePath,
|
||||||
|
|
||||||
|
outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void doPrepare(SparkSession spark, String sourcePath, String outputPath) {
|
||||||
|
Dataset<SDGDataModel> sdgDataset = readPath(spark, sourcePath, SDGDataModel.class);
|
||||||
|
|
||||||
|
sdgDataset
|
||||||
|
.groupByKey((MapFunction<SDGDataModel, String>) r -> r.getDoi().toLowerCase(), Encoders.STRING())
|
||||||
|
.mapGroups((MapGroupsFunction<String, SDGDataModel, Result>) (k, it) -> {
|
||||||
|
Result r = new Result();
|
||||||
|
r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI));
|
||||||
|
SDGDataModel first = it.next();
|
||||||
|
List<StructuredProperty> sbjs = new ArrayList<>();
|
||||||
|
sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID));
|
||||||
|
it
|
||||||
|
.forEachRemaining(
|
||||||
|
s -> sbjs
|
||||||
|
.add(getSubject(s.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)));
|
||||||
|
r.setSubject(sbjs);
|
||||||
|
return r;
|
||||||
|
}, Encoders.bean(Result.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/sdg");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.Constants.*;
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
public class SparkSaveUnresolved implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkSaveUnresolved.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
PrepareFOSSparkJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/produce_unresolved_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
String sourcePath = parser.get("sourcePath");
|
||||||
|
log.info("sourcePath: {}", sourcePath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
saveUnresolved(
|
||||||
|
spark,
|
||||||
|
sourcePath,
|
||||||
|
|
||||||
|
outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void saveUnresolved(SparkSession spark, String sourcePath, String outputPath) {
|
||||||
|
|
||||||
|
spark
|
||||||
|
.read()
|
||||||
|
.textFile(sourcePath + "/*")
|
||||||
|
.map(
|
||||||
|
(MapFunction<String, Result>) l -> OBJECT_MAPPER.readValue(l, Result.class),
|
||||||
|
Encoders.bean(Result.class))
|
||||||
|
.groupByKey((MapFunction<Result, String>) Result::getId, Encoders.STRING())
|
||||||
|
.mapGroups((MapGroupsFunction<String, Result, Result>) (k, it) -> {
|
||||||
|
Result ret = it.next();
|
||||||
|
it.forEachRemaining(r -> ret.mergeFrom(r));
|
||||||
|
return ret;
|
||||||
|
}, Encoders.bean(Result.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,71 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import com.opencsv.bean.CsvBindByPosition;
|
||||||
|
|
||||||
|
public class FOSDataModel implements Serializable {
|
||||||
|
@CsvBindByPosition(position = 0)
|
||||||
|
// @CsvBindByName(column = "doi")
|
||||||
|
private String doi;
|
||||||
|
|
||||||
|
@CsvBindByPosition(position = 1)
|
||||||
|
// @CsvBindByName(column = "level1")
|
||||||
|
private String level1;
|
||||||
|
|
||||||
|
@CsvBindByPosition(position = 2)
|
||||||
|
// @CsvBindByName(column = "level2")
|
||||||
|
private String level2;
|
||||||
|
|
||||||
|
@CsvBindByPosition(position = 3)
|
||||||
|
// @CsvBindByName(column = "level3")
|
||||||
|
private String level3;
|
||||||
|
|
||||||
|
public FOSDataModel() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public FOSDataModel(String doi, String level1, String level2, String level3) {
|
||||||
|
this.doi = doi;
|
||||||
|
this.level1 = level1;
|
||||||
|
this.level2 = level2;
|
||||||
|
this.level3 = level3;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static FOSDataModel newInstance(String d, String level1, String level2, String level3) {
|
||||||
|
return new FOSDataModel(d, level1, level2, level3);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDoi() {
|
||||||
|
return doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDoi(String doi) {
|
||||||
|
this.doi = doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLevel1() {
|
||||||
|
return level1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLevel1(String level1) {
|
||||||
|
this.level1 = level1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLevel2() {
|
||||||
|
return level2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLevel2(String level2) {
|
||||||
|
this.level2 = level2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLevel3() {
|
||||||
|
return level3;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLevel3(String level3) {
|
||||||
|
this.level3 = level3;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,47 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import com.opencsv.bean.CsvBindByPosition;
|
||||||
|
|
||||||
|
public class SDGDataModel implements Serializable {
|
||||||
|
|
||||||
|
@CsvBindByPosition(position = 0)
|
||||||
|
// @CsvBindByName(column = "doi")
|
||||||
|
private String doi;
|
||||||
|
|
||||||
|
@CsvBindByPosition(position = 1)
|
||||||
|
// @CsvBindByName(column = "sdg")
|
||||||
|
private String sbj;
|
||||||
|
|
||||||
|
public SDGDataModel() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public SDGDataModel(String doi, String sbj) {
|
||||||
|
this.doi = doi;
|
||||||
|
this.sbj = sbj;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static SDGDataModel newInstance(String d, String sbj) {
|
||||||
|
return new SDGDataModel(d, sbj);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDoi() {
|
||||||
|
return doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDoi(String doi) {
|
||||||
|
this.doi = doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSbj() {
|
||||||
|
return sbj;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSbj(String sbj) {
|
||||||
|
this.sbj = sbj;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,89 +0,0 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.datacite
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils
|
|
||||||
import org.apache.http.client.config.RequestConfig
|
|
||||||
import org.apache.http.client.methods.{HttpGet, HttpPost, HttpRequestBase, HttpUriRequest}
|
|
||||||
import org.apache.http.entity.StringEntity
|
|
||||||
import org.apache.http.impl.client.{HttpClientBuilder, HttpClients}
|
|
||||||
|
|
||||||
import java.io.IOException
|
|
||||||
|
|
||||||
abstract class AbstractRestClient extends Iterator[String]{
|
|
||||||
|
|
||||||
var buffer: List[String] = List()
|
|
||||||
var current_index:Int = 0
|
|
||||||
|
|
||||||
var scroll_value: Option[String] = None
|
|
||||||
|
|
||||||
var complete:Boolean = false
|
|
||||||
|
|
||||||
|
|
||||||
def extractInfo(input: String): Unit
|
|
||||||
|
|
||||||
protected def getBufferData(): Unit
|
|
||||||
|
|
||||||
|
|
||||||
def doHTTPGETRequest(url:String): String = {
|
|
||||||
val httpGet = new HttpGet(url)
|
|
||||||
doHTTPRequest(httpGet)
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
def doHTTPPOSTRequest(url:String, json:String): String = {
|
|
||||||
val httpPost = new HttpPost(url)
|
|
||||||
if (json != null) {
|
|
||||||
val entity = new StringEntity(json)
|
|
||||||
httpPost.setEntity(entity)
|
|
||||||
httpPost.setHeader("Accept", "application/json")
|
|
||||||
httpPost.setHeader("Content-type", "application/json")
|
|
||||||
}
|
|
||||||
doHTTPRequest(httpPost)
|
|
||||||
}
|
|
||||||
|
|
||||||
def hasNext: Boolean = {
|
|
||||||
buffer.nonEmpty && current_index < buffer.size
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
override def next(): String = {
|
|
||||||
val next_item:String = buffer(current_index)
|
|
||||||
current_index = current_index + 1
|
|
||||||
if (current_index == buffer.size)
|
|
||||||
getBufferData()
|
|
||||||
next_item
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private def doHTTPRequest[A <: HttpUriRequest](r: A) :String ={
|
|
||||||
val timeout = 60; // seconds
|
|
||||||
val config = RequestConfig.custom()
|
|
||||||
.setConnectTimeout(timeout * 1000)
|
|
||||||
.setConnectionRequestTimeout(timeout * 1000)
|
|
||||||
.setSocketTimeout(timeout * 1000).build()
|
|
||||||
val client =HttpClientBuilder.create().setDefaultRequestConfig(config).build()
|
|
||||||
var tries = 4
|
|
||||||
try {
|
|
||||||
while (tries > 0) {
|
|
||||||
println(s"requesting ${r.getURI}")
|
|
||||||
val response = client.execute(r)
|
|
||||||
println(s"get response with status${response.getStatusLine.getStatusCode}")
|
|
||||||
if (response.getStatusLine.getStatusCode > 400) {
|
|
||||||
tries -= 1
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return IOUtils.toString(response.getEntity.getContent)
|
|
||||||
}
|
|
||||||
""
|
|
||||||
} catch {
|
|
||||||
case e: Throwable =>
|
|
||||||
throw new RuntimeException("Error on executing request ", e)
|
|
||||||
} finally try client.close()
|
|
||||||
catch {
|
|
||||||
case e: IOException =>
|
|
||||||
throw new RuntimeException("Unable to close client ", e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
getBufferData()
|
|
||||||
}
|
|
|
@ -1,607 +0,0 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.datacite
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
|
||||||
import org.apache.commons.lang3.StringUtils
|
|
||||||
import org.json4s.DefaultFormats
|
|
||||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
|
||||||
import org.json4s.jackson.JsonMethods.parse
|
|
||||||
|
|
||||||
import java.nio.charset.CodingErrorAction
|
|
||||||
import java.text.SimpleDateFormat
|
|
||||||
import java.time.LocalDate
|
|
||||||
import java.time.chrono.ThaiBuddhistDate
|
|
||||||
import java.time.format.DateTimeFormatter
|
|
||||||
import java.util.regex.Pattern
|
|
||||||
import java.util.{Date, Locale}
|
|
||||||
import scala.collection.JavaConverters._
|
|
||||||
import scala.io.{Codec, Source}
|
|
||||||
import scala.language.postfixOps
|
|
||||||
|
|
||||||
case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {}
|
|
||||||
|
|
||||||
case class RelatedIdentifierType(relationType: String, relatedIdentifier: String, relatedIdentifierType: String) {}
|
|
||||||
|
|
||||||
case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {}
|
|
||||||
|
|
||||||
case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {}
|
|
||||||
|
|
||||||
case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {}
|
|
||||||
|
|
||||||
case class SubjectType(subject: Option[String], subjectScheme: Option[String]) {}
|
|
||||||
|
|
||||||
case class DescriptionType(descriptionType: Option[String], description: Option[String]) {}
|
|
||||||
|
|
||||||
case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle: Option[String], awardUri: Option[String], funderName: Option[String], funderIdentifier: Option[String], awardNumber: Option[String]) {}
|
|
||||||
|
|
||||||
case class DateType(date: Option[String], dateType: Option[String]) {}
|
|
||||||
|
|
||||||
case class HostedByMapType(openaire_id: String, datacite_name: String, official_name: String, similarity: Option[Float]) {}
|
|
||||||
|
|
||||||
object DataciteToOAFTransformation {
|
|
||||||
|
|
||||||
val REL_TYPE_VALUE:String = "resultResult"
|
|
||||||
val DATE_RELATION_KEY = "RelationDate"
|
|
||||||
|
|
||||||
val subRelTypeMapping: Map[String,(String,String)] = Map(
|
|
||||||
"References" ->("IsReferencedBy","relationship"),
|
|
||||||
"IsSupplementTo" ->("IsSupplementedBy","supplement"),
|
|
||||||
"IsPartOf" ->("HasPart","part"),
|
|
||||||
"HasPart" ->("IsPartOf","part"),
|
|
||||||
"IsVersionOf" ->("HasVersion","version"),
|
|
||||||
"HasVersion" ->("IsVersionOf","version"),
|
|
||||||
"IsIdenticalTo" ->("IsIdenticalTo","relationship"),
|
|
||||||
"IsPreviousVersionOf" ->("IsNewVersionOf","version"),
|
|
||||||
"IsContinuedBy" ->("Continues","relationship"),
|
|
||||||
"Continues" ->("IsContinuedBy","relationship"),
|
|
||||||
"IsNewVersionOf" ->("IsPreviousVersionOf","version"),
|
|
||||||
"IsSupplementedBy" ->("IsSupplementTo","supplement"),
|
|
||||||
"IsDocumentedBy" ->("Documents","relationship"),
|
|
||||||
"IsSourceOf" ->("IsDerivedFrom","relationship"),
|
|
||||||
"Cites" ->("IsCitedBy","citation"),
|
|
||||||
"IsCitedBy" ->("Cites","citation"),
|
|
||||||
"IsDerivedFrom" ->("IsSourceOf","relationship"),
|
|
||||||
"IsVariantFormOf" ->("IsDerivedFrom","version"),
|
|
||||||
"IsReferencedBy" ->("References","relationship"),
|
|
||||||
"IsObsoletedBy" ->("IsNewVersionOf","version"),
|
|
||||||
"Reviews" ->("IsReviewedBy","review"),
|
|
||||||
"Documents" ->("IsDocumentedBy","relationship"),
|
|
||||||
"IsCompiledBy" ->("Compiles","relationship"),
|
|
||||||
"Compiles" ->("IsCompiledBy","relationship"),
|
|
||||||
"IsReviewedBy" ->("Reviews","review")
|
|
||||||
)
|
|
||||||
|
|
||||||
implicit val codec: Codec = Codec("UTF-8")
|
|
||||||
codec.onMalformedInput(CodingErrorAction.REPLACE)
|
|
||||||
codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
|
||||||
|
|
||||||
val DOI_CLASS = "doi"
|
|
||||||
val SUBJ_CLASS = "keywords"
|
|
||||||
|
|
||||||
|
|
||||||
val j_filter: List[String] = {
|
|
||||||
val s = Source.fromInputStream(getClass.getResourceAsStream("datacite_filter")).mkString
|
|
||||||
s.lines.toList
|
|
||||||
}
|
|
||||||
|
|
||||||
val mapper = new ObjectMapper()
|
|
||||||
val unknown_repository: HostedByMapType = HostedByMapType(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID, ModelConstants.UNKNOWN_REPOSITORY.getValue, ModelConstants.UNKNOWN_REPOSITORY.getValue, Some(1.0F))
|
|
||||||
|
|
||||||
val dataInfo: DataInfo = generateDataInfo("0.9")
|
|
||||||
val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, "Datacite")
|
|
||||||
|
|
||||||
val hostedByMap: Map[String, HostedByMapType] = {
|
|
||||||
val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString
|
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
||||||
lazy val json: org.json4s.JValue = parse(s)
|
|
||||||
json.extract[Map[String, HostedByMapType]]
|
|
||||||
}
|
|
||||||
|
|
||||||
val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH)
|
|
||||||
val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN)
|
|
||||||
|
|
||||||
val funder_regex: List[(Pattern, String)] = List(
|
|
||||||
(Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda__h2020::"),
|
|
||||||
(Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda_______::")
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
val Date_regex: List[Pattern] = List(
|
|
||||||
//Y-M-D
|
|
||||||
Pattern.compile("(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", Pattern.MULTILINE),
|
|
||||||
//M-D-Y
|
|
||||||
Pattern.compile("((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", Pattern.MULTILINE),
|
|
||||||
//D-M-Y
|
|
||||||
Pattern.compile("(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", Pattern.MULTILINE),
|
|
||||||
//Y
|
|
||||||
Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def filter_json(json: String): Boolean = {
|
|
||||||
j_filter.exists(f => json.contains(f))
|
|
||||||
}
|
|
||||||
|
|
||||||
def toActionSet(item: Oaf): (String, String) = {
|
|
||||||
val mapper = new ObjectMapper()
|
|
||||||
|
|
||||||
item match {
|
|
||||||
case dataset: OafDataset =>
|
|
||||||
val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset]
|
|
||||||
a.setClazz(classOf[OafDataset])
|
|
||||||
a.setPayload(dataset)
|
|
||||||
(dataset.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
|
||||||
case publication: Publication =>
|
|
||||||
val a: AtomicAction[Publication] = new AtomicAction[Publication]
|
|
||||||
a.setClazz(classOf[Publication])
|
|
||||||
a.setPayload(publication)
|
|
||||||
(publication.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
|
||||||
case software: Software =>
|
|
||||||
val a: AtomicAction[Software] = new AtomicAction[Software]
|
|
||||||
a.setClazz(classOf[Software])
|
|
||||||
a.setPayload(software)
|
|
||||||
(software.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
|
||||||
case orp: OtherResearchProduct =>
|
|
||||||
val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct]
|
|
||||||
a.setClazz(classOf[OtherResearchProduct])
|
|
||||||
a.setPayload(orp)
|
|
||||||
(orp.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
|
||||||
|
|
||||||
case relation: Relation =>
|
|
||||||
val a: AtomicAction[Relation] = new AtomicAction[Relation]
|
|
||||||
a.setClazz(classOf[Relation])
|
|
||||||
a.setPayload(relation)
|
|
||||||
(relation.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
|
||||||
case _ =>
|
|
||||||
null
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def embargo_end(embargo_end_date: String): Boolean = {
|
|
||||||
val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
|
|
||||||
val td = LocalDate.now()
|
|
||||||
td.isAfter(dt)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def extract_date(input: String): Option[String] = {
|
|
||||||
val d = Date_regex.map(pattern => {
|
|
||||||
val matcher = pattern.matcher(input)
|
|
||||||
if (matcher.find())
|
|
||||||
matcher.group(0)
|
|
||||||
else
|
|
||||||
null
|
|
||||||
}
|
|
||||||
).find(s => s != null)
|
|
||||||
|
|
||||||
if (d.isDefined) {
|
|
||||||
val a_date = if (d.get.length == 4) s"01-01-${d.get}" else d.get
|
|
||||||
try {
|
|
||||||
return Some(LocalDate.parse(a_date, df_en).toString)
|
|
||||||
} catch {
|
|
||||||
case _: Throwable => try {
|
|
||||||
return Some(LocalDate.parse(a_date, df_it).toString)
|
|
||||||
} catch {
|
|
||||||
case _: Throwable =>
|
|
||||||
return None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
d
|
|
||||||
}
|
|
||||||
|
|
||||||
def fix_thai_date(input:String, format:String) :String = {
|
|
||||||
try {
|
|
||||||
val a_date = LocalDate.parse(input,DateTimeFormatter.ofPattern(format))
|
|
||||||
val d = ThaiBuddhistDate.of(a_date.getYear, a_date.getMonth.getValue, a_date.getDayOfMonth)
|
|
||||||
LocalDate.from(d).toString
|
|
||||||
} catch {
|
|
||||||
case _: Throwable => ""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
def getTypeQualifier(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): (Qualifier, Qualifier) = {
|
|
||||||
if (resourceType != null && resourceType.nonEmpty) {
|
|
||||||
val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
|
||||||
if (typeQualifier != null)
|
|
||||||
return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid))
|
|
||||||
}
|
|
||||||
if (schemaOrg != null && schemaOrg.nonEmpty) {
|
|
||||||
val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, schemaOrg)
|
|
||||||
if (typeQualifier != null)
|
|
||||||
return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid))
|
|
||||||
|
|
||||||
}
|
|
||||||
if (resourceTypeGeneral != null && resourceTypeGeneral.nonEmpty) {
|
|
||||||
val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceTypeGeneral)
|
|
||||||
if (typeQualifier != null)
|
|
||||||
return (typeQualifier, vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid))
|
|
||||||
|
|
||||||
}
|
|
||||||
null
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def getResult(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): Result = {
|
|
||||||
val typeQualifiers: (Qualifier, Qualifier) = getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
|
|
||||||
if (typeQualifiers == null)
|
|
||||||
return null
|
|
||||||
val i = new Instance
|
|
||||||
i.setInstancetype(typeQualifiers._1)
|
|
||||||
typeQualifiers._2.getClassname match {
|
|
||||||
case "dataset" =>
|
|
||||||
val r = new OafDataset
|
|
||||||
r.setInstance(List(i).asJava)
|
|
||||||
return r
|
|
||||||
case "publication" =>
|
|
||||||
val r = new Publication
|
|
||||||
r.setInstance(List(i).asJava)
|
|
||||||
return r
|
|
||||||
case "software" =>
|
|
||||||
val r = new Software
|
|
||||||
r.setInstance(List(i).asJava)
|
|
||||||
return r
|
|
||||||
case "other" =>
|
|
||||||
val r = new OtherResearchProduct
|
|
||||||
r.setInstance(List(i).asJava)
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
null
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def available_date(input: String): Boolean = {
|
|
||||||
|
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
||||||
lazy val json: org.json4s.JValue = parse(input)
|
|
||||||
val l: List[String] = for {
|
|
||||||
JObject(dates) <- json \\ "dates"
|
|
||||||
JField("dateType", JString(dateTypes)) <- dates
|
|
||||||
} yield dateTypes
|
|
||||||
|
|
||||||
l.exists(p => p.equalsIgnoreCase("available"))
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* As describe in ticket #6377
|
|
||||||
* when the result come from figshare we need to remove subject
|
|
||||||
* and set Access rights OPEN.
|
|
||||||
*
|
|
||||||
* @param r
|
|
||||||
*/
|
|
||||||
def fix_figshare(r: Result): Unit = {
|
|
||||||
|
|
||||||
if (r.getInstance() != null) {
|
|
||||||
val hosted_by_figshare = r.getInstance().asScala.exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue))
|
|
||||||
if (hosted_by_figshare) {
|
|
||||||
r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT()))
|
|
||||||
val l: List[StructuredProperty] = List()
|
|
||||||
r.setSubject(l.asJava)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def createDNetTargetIdentifier(pid: String, pidType: String, idPrefix: String): String = {
|
|
||||||
val f_part = s"$idPrefix|${pidType.toLowerCase}".padTo(15, '_')
|
|
||||||
s"$f_part::${IdentifierFactory.md5(pid.toLowerCase)}"
|
|
||||||
}
|
|
||||||
|
|
||||||
def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = {
|
|
||||||
OafMapperUtils.structuredProperty(dt, q, null)
|
|
||||||
}
|
|
||||||
|
|
||||||
def generateRelation(sourceId: String, targetId: String, relClass: String, cf: KeyValue, di: DataInfo): Relation = {
|
|
||||||
|
|
||||||
val r = new Relation
|
|
||||||
r.setSource(sourceId)
|
|
||||||
r.setTarget(targetId)
|
|
||||||
r.setRelType(ModelConstants.RESULT_PROJECT)
|
|
||||||
r.setRelClass(relClass)
|
|
||||||
r.setSubRelType(ModelConstants.OUTCOME)
|
|
||||||
r.setCollectedfrom(List(cf).asJava)
|
|
||||||
r.setDataInfo(di)
|
|
||||||
r
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_projectRelation(awardUri: String, sourceId: String): List[Relation] = {
|
|
||||||
val match_pattern = funder_regex.find(s => s._1.matcher(awardUri).find())
|
|
||||||
|
|
||||||
if (match_pattern.isDefined) {
|
|
||||||
val m = match_pattern.get._1
|
|
||||||
val p = match_pattern.get._2
|
|
||||||
val grantId = m.matcher(awardUri).replaceAll("$2")
|
|
||||||
val targetId = s"$p${DHPUtils.md5(grantId)}"
|
|
||||||
List(
|
|
||||||
generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo),
|
|
||||||
generateRelation(targetId, sourceId, "produces", DATACITE_COLLECTED_FROM, dataInfo)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
else
|
|
||||||
List()
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def generateOAF(input: String, ts: Long, dateOfCollection: Long, vocabularies: VocabularyGroup, exportLinks: Boolean): List[Oaf] = {
|
|
||||||
if (filter_json(input))
|
|
||||||
return List()
|
|
||||||
|
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
||||||
lazy val json = parse(input)
|
|
||||||
|
|
||||||
val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null)
|
|
||||||
val resourceTypeGeneral = (json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null)
|
|
||||||
val schemaOrg = (json \ "attributes" \ "types" \ "schemaOrg").extractOrElse[String](null)
|
|
||||||
|
|
||||||
val doi = (json \ "attributes" \ "doi").extract[String]
|
|
||||||
if (doi.isEmpty)
|
|
||||||
return List()
|
|
||||||
|
|
||||||
//Mapping type based on vocabularies dnet:publication_resource and dnet:result_typologies
|
|
||||||
val result = getResult(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
|
|
||||||
if (result == null)
|
|
||||||
return List()
|
|
||||||
|
|
||||||
|
|
||||||
val doi_q = OafMapperUtils.qualifier("doi", "doi", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES)
|
|
||||||
val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo)
|
|
||||||
result.setPid(List(pid).asJava)
|
|
||||||
result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true))
|
|
||||||
result.setOriginalId(List(doi).asJava)
|
|
||||||
|
|
||||||
val d = new Date(dateOfCollection * 1000)
|
|
||||||
val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US)
|
|
||||||
|
|
||||||
|
|
||||||
result.setDateofcollection(ISO8601FORMAT.format(d))
|
|
||||||
result.setDateoftransformation(ISO8601FORMAT.format(d))
|
|
||||||
result.setDataInfo(dataInfo)
|
|
||||||
|
|
||||||
val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List())
|
|
||||||
|
|
||||||
|
|
||||||
val authors = creators.zipWithIndex.map { case (c, idx) =>
|
|
||||||
val a = new Author
|
|
||||||
a.setFullname(c.name.orNull)
|
|
||||||
a.setName(c.givenName.orNull)
|
|
||||||
a.setSurname(c.familyName.orNull)
|
|
||||||
if (c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) {
|
|
||||||
a.setPid(c.nameIdentifiers.get.map(ni => {
|
|
||||||
val q = if (ni.nameIdentifierScheme.isDefined) vocabularies.getTermAsQualifier(ModelConstants.DNET_PID_TYPES, ni.nameIdentifierScheme.get.toLowerCase()) else null
|
|
||||||
if (ni.nameIdentifier != null && ni.nameIdentifier.isDefined) {
|
|
||||||
OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo)
|
|
||||||
}
|
|
||||||
else
|
|
||||||
null
|
|
||||||
|
|
||||||
}
|
|
||||||
)
|
|
||||||
.asJava)
|
|
||||||
}
|
|
||||||
if (c.affiliation.isDefined)
|
|
||||||
a.setAffiliation(c.affiliation.get.filter(af => af.nonEmpty).map(af => OafMapperUtils.field(af, dataInfo)).asJava)
|
|
||||||
a.setRank(idx + 1)
|
|
||||||
a
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
|
|
||||||
|
|
||||||
result.setTitle(titles.filter(t => t.title.nonEmpty).map(t => {
|
|
||||||
if (t.titleType.isEmpty) {
|
|
||||||
OafMapperUtils.structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER, null)
|
|
||||||
} else {
|
|
||||||
OafMapperUtils.structuredProperty(t.title.get, t.titleType.get, t.titleType.get, ModelConstants.DNET_DATACITE_TITLE, ModelConstants.DNET_DATACITE_TITLE, null)
|
|
||||||
}
|
|
||||||
}).asJava)
|
|
||||||
|
|
||||||
if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
|
|
||||||
return List()
|
|
||||||
result.setAuthor(authors.asJava)
|
|
||||||
|
|
||||||
val dates = (json \\ "dates").extract[List[DateType]]
|
|
||||||
val publication_year = (json \\ "publicationYear").extractOrElse[String](null)
|
|
||||||
|
|
||||||
val i_date = dates
|
|
||||||
.filter(d => d.date.isDefined && d.dateType.isDefined)
|
|
||||||
.find(d => d.dateType.get.equalsIgnoreCase("issued"))
|
|
||||||
.map(d => extract_date(d.date.get))
|
|
||||||
val a_date: Option[String] = dates
|
|
||||||
.filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available"))
|
|
||||||
.map(d => extract_date(d.date.get))
|
|
||||||
.find(d => d != null && d.isDefined)
|
|
||||||
.map(d => d.get)
|
|
||||||
|
|
||||||
if (a_date.isDefined) {
|
|
||||||
if(doi.startsWith("10.14457"))
|
|
||||||
result.setEmbargoenddate(OafMapperUtils.field(fix_thai_date(a_date.get,"[yyyy-MM-dd]"), null))
|
|
||||||
else
|
|
||||||
result.setEmbargoenddate(OafMapperUtils.field(a_date.get, null))
|
|
||||||
}
|
|
||||||
if (i_date.isDefined && i_date.get.isDefined) {
|
|
||||||
if(doi.startsWith("10.14457")) {
|
|
||||||
result.setDateofacceptance(OafMapperUtils.field(fix_thai_date(i_date.get.get,"[yyyy-MM-dd]"), null))
|
|
||||||
result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(fix_thai_date(i_date.get.get,"[yyyy-MM-dd]"), null))
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
result.setDateofacceptance(OafMapperUtils.field(i_date.get.get, null))
|
|
||||||
result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(i_date.get.get, null))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (publication_year != null) {
|
|
||||||
if(doi.startsWith("10.14457")) {
|
|
||||||
result.setDateofacceptance(OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year","[dd-MM-yyyy]"), null))
|
|
||||||
result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year","[dd-MM-yyyy]"), null))
|
|
||||||
|
|
||||||
} else {
|
|
||||||
result.setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null))
|
|
||||||
result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
result.setRelevantdate(dates.filter(d => d.date.isDefined && d.dateType.isDefined)
|
|
||||||
.map(d => (extract_date(d.date.get), d.dateType.get))
|
|
||||||
.filter(d => d._1.isDefined)
|
|
||||||
.map(d => (d._1.get, vocabularies.getTermAsQualifier(ModelConstants.DNET_DATACITE_DATE, d._2.toLowerCase())))
|
|
||||||
.filter(d => d._2 != null)
|
|
||||||
.map(d => generateOAFDate(d._1, d._2)).asJava)
|
|
||||||
|
|
||||||
val subjects = (json \\ "subjects").extract[List[SubjectType]]
|
|
||||||
|
|
||||||
result.setSubject(subjects.filter(s => s.subject.nonEmpty)
|
|
||||||
.map(s =>
|
|
||||||
OafMapperUtils.structuredProperty(s.subject.get, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, null)
|
|
||||||
).asJava)
|
|
||||||
|
|
||||||
|
|
||||||
result.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
|
||||||
|
|
||||||
val descriptions = (json \\ "descriptions").extract[List[DescriptionType]]
|
|
||||||
|
|
||||||
result.setDescription(
|
|
||||||
descriptions
|
|
||||||
.filter(d => d.description.isDefined).
|
|
||||||
map(d =>
|
|
||||||
OafMapperUtils.field(d.description.get, null)
|
|
||||||
).filter(s => s != null).asJava)
|
|
||||||
|
|
||||||
|
|
||||||
val publisher = (json \\ "publisher").extractOrElse[String](null)
|
|
||||||
if (publisher != null)
|
|
||||||
result.setPublisher(OafMapperUtils.field(publisher, null))
|
|
||||||
|
|
||||||
|
|
||||||
val language: String = (json \\ "language").extractOrElse[String](null)
|
|
||||||
|
|
||||||
if (language != null)
|
|
||||||
result.setLanguage(vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, language))
|
|
||||||
|
|
||||||
|
|
||||||
val instance = result.getInstance().get(0)
|
|
||||||
|
|
||||||
val client = (json \ "relationships" \ "client" \\ "id").extractOpt[String]
|
|
||||||
|
|
||||||
val accessRights: List[String] = for {
|
|
||||||
JObject(rightsList) <- json \\ "rightsList"
|
|
||||||
JField("rightsUri", JString(rightsUri)) <- rightsList
|
|
||||||
} yield rightsUri
|
|
||||||
|
|
||||||
val aRights: Option[AccessRight] = accessRights.map(r => {
|
|
||||||
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_ACCESS_MODES, r)
|
|
||||||
}).find(q => q != null).map(q => {
|
|
||||||
val a = new AccessRight
|
|
||||||
a.setClassid(q.getClassid)
|
|
||||||
a.setClassname(q.getClassname)
|
|
||||||
a.setSchemeid(q.getSchemeid)
|
|
||||||
a.setSchemename(q.getSchemename)
|
|
||||||
a
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
val access_rights_qualifier = if (aRights.isDefined) aRights.get else OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
|
||||||
|
|
||||||
if (client.isDefined) {
|
|
||||||
val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository)
|
|
||||||
instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name))
|
|
||||||
instance.setCollectedfrom(DATACITE_COLLECTED_FROM)
|
|
||||||
instance.setUrl(List(s"https://dx.doi.org/$doi").asJava)
|
|
||||||
instance.setAccessright(access_rights_qualifier)
|
|
||||||
instance.setPid(result.getPid)
|
|
||||||
val license = accessRights
|
|
||||||
.find(r => r.startsWith("http") && r.matches(".*(/licenses|/publicdomain|unlicense\\.org/|/legal-and-data-protection-notices|/download/license|/open-government-licence).*"))
|
|
||||||
if (license.isDefined)
|
|
||||||
instance.setLicense(OafMapperUtils.field(license.get, null))
|
|
||||||
}
|
|
||||||
|
|
||||||
val awardUris: List[String] = for {
|
|
||||||
JObject(fundingReferences) <- json \\ "fundingReferences"
|
|
||||||
JField("awardUri", JString(awardUri)) <- fundingReferences
|
|
||||||
} yield awardUri
|
|
||||||
|
|
||||||
result.setId(IdentifierFactory.createIdentifier(result))
|
|
||||||
var relations: List[Relation] = awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null)
|
|
||||||
|
|
||||||
fix_figshare(result)
|
|
||||||
|
|
||||||
if (result.getId == null)
|
|
||||||
return List()
|
|
||||||
|
|
||||||
if (exportLinks) {
|
|
||||||
val rels: List[RelatedIdentifierType] = for {
|
|
||||||
JObject(relIdentifier) <- json \\ "relatedIdentifiers"
|
|
||||||
JField("relationType", JString(relationType)) <- relIdentifier
|
|
||||||
JField("relatedIdentifierType", JString(relatedIdentifierType)) <- relIdentifier
|
|
||||||
JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier
|
|
||||||
} yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType)
|
|
||||||
|
|
||||||
relations = relations ::: generateRelations(rels,result.getId, if (i_date.isDefined && i_date.get.isDefined) i_date.get.get else null)
|
|
||||||
}
|
|
||||||
if (relations != null && relations.nonEmpty) {
|
|
||||||
List(result) ::: relations
|
|
||||||
}
|
|
||||||
else
|
|
||||||
List(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
private def generateRelations(rels: List[RelatedIdentifierType], id:String, date:String):List[Relation] = {
|
|
||||||
rels
|
|
||||||
.filter(r =>
|
|
||||||
subRelTypeMapping.contains(r.relationType) && (
|
|
||||||
r.relatedIdentifierType.equalsIgnoreCase("doi") ||
|
|
||||||
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
|
|
||||||
r.relatedIdentifierType.equalsIgnoreCase("arxiv"))
|
|
||||||
)
|
|
||||||
.map(r => {
|
|
||||||
val rel = new Relation
|
|
||||||
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
|
||||||
rel.setDataInfo(dataInfo)
|
|
||||||
|
|
||||||
val subRelType = subRelTypeMapping(r.relationType)._2
|
|
||||||
rel.setRelType(REL_TYPE_VALUE)
|
|
||||||
rel.setSubRelType(subRelType)
|
|
||||||
rel.setRelClass(r.relationType)
|
|
||||||
|
|
||||||
val dateProps:KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date)
|
|
||||||
|
|
||||||
rel.setProperties(List(dateProps).asJava)
|
|
||||||
|
|
||||||
rel.setSource(id)
|
|
||||||
rel.setTarget(s"unresolved::${r.relatedIdentifier}::${r.relatedIdentifierType}")
|
|
||||||
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
|
||||||
rel.getCollectedfrom.asScala.map(c => c.getValue)(collection.breakOut)
|
|
||||||
rel
|
|
||||||
})(collection breakOut)
|
|
||||||
}
|
|
||||||
|
|
||||||
def generateDataInfo(trust: String): DataInfo = {
|
|
||||||
val di = new DataInfo
|
|
||||||
di.setDeletedbyinference(false)
|
|
||||||
di.setInferred(false)
|
|
||||||
di.setInvisible(false)
|
|
||||||
di.setTrust(trust)
|
|
||||||
di.setProvenanceaction(ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER)
|
|
||||||
di
|
|
||||||
}
|
|
||||||
|
|
||||||
def generateDSId(input: String): String = {
|
|
||||||
val b = StringUtils.substringBefore(input, "::")
|
|
||||||
val a = StringUtils.substringAfter(input, "::")
|
|
||||||
s"10|$b::${DHPUtils.md5(a)}"
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,41 +0,0 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.datacite
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf
|
|
||||||
import org.apache.hadoop.io.Text
|
|
||||||
import org.apache.hadoop.io.compress.GzipCodec
|
|
||||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat
|
|
||||||
import org.apache.spark.SparkConf
|
|
||||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
|
||||||
|
|
||||||
import scala.io.Source
|
|
||||||
|
|
||||||
object ExportActionSetJobNode {
|
|
||||||
|
|
||||||
val log: Logger = LoggerFactory.getLogger(ExportActionSetJobNode.getClass)
|
|
||||||
|
|
||||||
def main(args: Array[String]): Unit = {
|
|
||||||
val conf = new SparkConf
|
|
||||||
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/exportDataset_parameters.json")).mkString)
|
|
||||||
parser.parseArgument(args)
|
|
||||||
val master = parser.get("master")
|
|
||||||
val sourcePath = parser.get("sourcePath")
|
|
||||||
val targetPath = parser.get("targetPath")
|
|
||||||
|
|
||||||
val spark: SparkSession = SparkSession.builder().config(conf)
|
|
||||||
.appName(ExportActionSetJobNode.getClass.getSimpleName)
|
|
||||||
.master(master)
|
|
||||||
.getOrCreate()
|
|
||||||
implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
|
|
||||||
implicit val tEncoder:Encoder[(String,String)] = Encoders.tuple(Encoders.STRING,Encoders.STRING)
|
|
||||||
|
|
||||||
spark.read.load(sourcePath).as[Oaf]
|
|
||||||
.map(o =>DataciteToOAFTransformation.toActionSet(o))
|
|
||||||
.filter(o => o!= null)
|
|
||||||
.rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec])
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,46 +0,0 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.datacite
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Result}
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory
|
|
||||||
import org.apache.spark.SparkConf
|
|
||||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
|
||||||
|
|
||||||
import scala.io.Source
|
|
||||||
|
|
||||||
object FilterCrossrefEntitiesSpark {
|
|
||||||
|
|
||||||
val log: Logger = LoggerFactory.getLogger(getClass.getClass)
|
|
||||||
|
|
||||||
def main(args: Array[String]): Unit = {
|
|
||||||
val conf = new SparkConf
|
|
||||||
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/filter_crossref_param.json")).mkString)
|
|
||||||
parser.parseArgument(args)
|
|
||||||
val master = parser.get("master")
|
|
||||||
val sourcePath = parser.get("sourcePath")
|
|
||||||
log.info("sourcePath: {}", sourcePath)
|
|
||||||
val targetPath = parser.get("targetPath")
|
|
||||||
log.info("targetPath: {}", targetPath)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val spark: SparkSession = SparkSession.builder().config(conf)
|
|
||||||
.appName(getClass.getSimpleName)
|
|
||||||
.master(master)
|
|
||||||
.getOrCreate()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
|
|
||||||
implicit val resEncoder: Encoder[Result] = Encoders.kryo[Result]
|
|
||||||
|
|
||||||
val d:Dataset[Oaf]= spark.read.load(sourcePath).as[Oaf]
|
|
||||||
|
|
||||||
d.filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]).write.mode(SaveMode.Overwrite).save(targetPath)
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,48 +0,0 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.datacite
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory
|
|
||||||
import org.apache.spark.SparkConf
|
|
||||||
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
|
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
|
||||||
|
|
||||||
import scala.io.Source
|
|
||||||
|
|
||||||
object GenerateDataciteDatasetSpark {
|
|
||||||
|
|
||||||
val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass)
|
|
||||||
|
|
||||||
def main(args: Array[String]): Unit = {
|
|
||||||
val conf = new SparkConf
|
|
||||||
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json")).mkString)
|
|
||||||
parser.parseArgument(args)
|
|
||||||
val master = parser.get("master")
|
|
||||||
val sourcePath = parser.get("sourcePath")
|
|
||||||
val targetPath = parser.get("targetPath")
|
|
||||||
val exportLinks = "true".equalsIgnoreCase(parser.get("exportLinks"))
|
|
||||||
val isLookupUrl: String = parser.get("isLookupUrl")
|
|
||||||
log.info("isLookupUrl: {}", isLookupUrl)
|
|
||||||
|
|
||||||
val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl)
|
|
||||||
val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
|
|
||||||
val spark: SparkSession = SparkSession.builder().config(conf)
|
|
||||||
.appName(GenerateDataciteDatasetSpark.getClass.getSimpleName)
|
|
||||||
.master(master)
|
|
||||||
.getOrCreate()
|
|
||||||
|
|
||||||
implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord]
|
|
||||||
|
|
||||||
implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
|
|
||||||
|
|
||||||
import spark.implicits._
|
|
||||||
|
|
||||||
spark.read.load(sourcePath).as[DataciteType]
|
|
||||||
.filter(d => d.isActive)
|
|
||||||
.flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks))
|
|
||||||
.filter(d => d != null)
|
|
||||||
.write.mode(SaveMode.Overwrite).save(targetPath)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,181 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.opencitations;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import org.apache.commons.cli.ParseException;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
public class CreateActionSetSparkJob implements Serializable {
|
||||||
|
public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations";
|
||||||
|
public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations";
|
||||||
|
private static final String ID_PREFIX = "50|doi_________::";
|
||||||
|
private static final String TRUST = "0.91";
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws IOException, ParseException {
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
CreateActionSetSparkJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json"))));
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("inputPath");
|
||||||
|
log.info("inputPath {}", inputPath.toString());
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath {}", outputPath);
|
||||||
|
|
||||||
|
final boolean shouldDuplicateRels = Optional
|
||||||
|
.ofNullable(parser.get("shouldDuplicateRels"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.FALSE);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
extractContent(spark, inputPath, outputPath, shouldDuplicateRels);
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void extractContent(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
boolean shouldDuplicateRels) {
|
||||||
|
spark
|
||||||
|
.sqlContext()
|
||||||
|
.createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING())
|
||||||
|
.flatMap(
|
||||||
|
(FlatMapFunction<String, Relation>) value -> createRelation(value, shouldDuplicateRels).iterator(),
|
||||||
|
Encoders.bean(Relation.class))
|
||||||
|
.filter((FilterFunction<Relation>) value -> value != null)
|
||||||
|
.toJavaRDD()
|
||||||
|
.map(p -> new AtomicAction(p.getClass(), p))
|
||||||
|
.mapToPair(
|
||||||
|
aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||||
|
new Text(OBJECT_MAPPER.writeValueAsString(aa))))
|
||||||
|
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Relation> createRelation(String value, boolean duplicate) {
|
||||||
|
String[] line = value.split(",");
|
||||||
|
if (!line[1].startsWith("10.")) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
List<Relation> relationList = new ArrayList<>();
|
||||||
|
|
||||||
|
String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[1]));
|
||||||
|
final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[2]));
|
||||||
|
|
||||||
|
relationList
|
||||||
|
.addAll(
|
||||||
|
getRelations(
|
||||||
|
citing,
|
||||||
|
cited));
|
||||||
|
|
||||||
|
if (duplicate && line[1].endsWith(".refs")) {
|
||||||
|
citing = ID_PREFIX + IdentifierFactory
|
||||||
|
.md5(CleaningFunctions.normalizePidValue("doi", line[1].substring(0, line[1].indexOf(".refs"))));
|
||||||
|
relationList.addAll(getRelations(citing, cited));
|
||||||
|
}
|
||||||
|
|
||||||
|
return relationList;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Collection<Relation> getRelations(String citing, String cited) {
|
||||||
|
|
||||||
|
return Arrays
|
||||||
|
.asList(
|
||||||
|
getRelation(citing, cited, ModelConstants.CITES),
|
||||||
|
getRelation(cited, citing, ModelConstants.IS_CITED_BY));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Relation getRelation(
|
||||||
|
String source,
|
||||||
|
String target,
|
||||||
|
String relclass) {
|
||||||
|
Relation r = new Relation();
|
||||||
|
r.setCollectedfrom(getCollectedFrom());
|
||||||
|
r.setSource(source);
|
||||||
|
r.setTarget(target);
|
||||||
|
r.setRelClass(relclass);
|
||||||
|
r.setRelType(ModelConstants.RESULT_RESULT);
|
||||||
|
r.setSubRelType(ModelConstants.CITATION);
|
||||||
|
r
|
||||||
|
.setDataInfo(
|
||||||
|
getDataInfo());
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<KeyValue> getCollectedFrom() {
|
||||||
|
KeyValue kv = new KeyValue();
|
||||||
|
kv.setKey(ModelConstants.OPENOCITATIONS_ID);
|
||||||
|
kv.setValue(ModelConstants.OPENOCITATIONS_NAME);
|
||||||
|
|
||||||
|
return Arrays.asList(kv);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DataInfo getDataInfo() {
|
||||||
|
DataInfo di = new DataInfo();
|
||||||
|
di.setInferred(false);
|
||||||
|
di.setDeletedbyinference(false);
|
||||||
|
di.setTrust(TRUST);
|
||||||
|
|
||||||
|
di
|
||||||
|
.setProvenanceaction(
|
||||||
|
getQualifier(OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, ModelConstants.DNET_PROVENANCE_ACTIONS));
|
||||||
|
return di;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Qualifier getQualifier(String class_id, String class_name,
|
||||||
|
String qualifierSchema) {
|
||||||
|
Qualifier pa = new Qualifier();
|
||||||
|
pa.setClassid(class_id);
|
||||||
|
pa.setClassname(class_name);
|
||||||
|
pa.setSchemeid(qualifierSchema);
|
||||||
|
pa.setSchemename(qualifierSchema);
|
||||||
|
return pa;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,93 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.opencitations;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.zip.GZIPOutputStream;
|
||||||
|
import java.util.zip.ZipEntry;
|
||||||
|
import java.util.zip.ZipInputStream;
|
||||||
|
|
||||||
|
import org.apache.commons.cli.ParseException;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
public class GetOpenCitationsRefs implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(GetOpenCitationsRefs.class);
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws IOException, ParseException {
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
GetOpenCitationsRefs.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json"))));
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String[] inputFile = parser.get("inputFile").split(";");
|
||||||
|
log.info("inputFile {}", inputFile.toString());
|
||||||
|
|
||||||
|
final String workingPath = parser.get("workingPath");
|
||||||
|
log.info("workingPath {}", workingPath);
|
||||||
|
|
||||||
|
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||||
|
log.info("hdfsNameNode {}", hdfsNameNode);
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
|
GetOpenCitationsRefs ocr = new GetOpenCitationsRefs();
|
||||||
|
|
||||||
|
for (String file : inputFile) {
|
||||||
|
ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doExtract(String inputFile, String workingPath, FileSystem fileSystem)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
final Path path = new Path(inputFile);
|
||||||
|
|
||||||
|
FSDataInputStream oc_zip = fileSystem.open(path);
|
||||||
|
|
||||||
|
int count = 1;
|
||||||
|
try (ZipInputStream zis = new ZipInputStream(oc_zip)) {
|
||||||
|
ZipEntry entry = null;
|
||||||
|
while ((entry = zis.getNextEntry()) != null) {
|
||||||
|
|
||||||
|
if (!entry.isDirectory()) {
|
||||||
|
String fileName = entry.getName();
|
||||||
|
fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count;
|
||||||
|
count++;
|
||||||
|
try (
|
||||||
|
FSDataOutputStream out = fileSystem
|
||||||
|
.create(new Path(workingPath + "/COCI/" + fileName + ".gz"));
|
||||||
|
GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) {
|
||||||
|
|
||||||
|
IOUtils.copy(zis, gzipOs);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -20,7 +20,7 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -171,26 +171,23 @@ public class PrepareProgramme {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) {
|
private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) {
|
||||||
if (!a.getLanguage().equals("en")) {
|
if (!a.getLanguage().equals("en") && b.getLanguage().equalsIgnoreCase("en")) {
|
||||||
if (b.getLanguage().equalsIgnoreCase("en")) {
|
|
||||||
a.setTitle(b.getTitle());
|
a.setTitle(b.getTitle());
|
||||||
a.setLanguage(b.getLanguage());
|
a.setLanguage(b.getLanguage());
|
||||||
}
|
}
|
||||||
}
|
if (StringUtils.isEmpty(a.getShortTitle()) && !StringUtils.isEmpty(b.getShortTitle())) {
|
||||||
if (StringUtils.isEmpty(a.getShortTitle())) {
|
|
||||||
if (!StringUtils.isEmpty(b.getShortTitle())) {
|
|
||||||
a.setShortTitle(b.getShortTitle());
|
a.setShortTitle(b.getShortTitle());
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) {
|
private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) {
|
||||||
Object[] codedescription = h2020Programmes
|
Object[] codedescription = h2020Programmes
|
||||||
.map(
|
.map(
|
||||||
value -> new Tuple2<>(value.getCode(),
|
value -> new Tuple2<>(value.getCode(),
|
||||||
new Tuple2<String, String>(value.getTitle(), value.getShortTitle())))
|
new Tuple2<>(value.getTitle(), value.getShortTitle())))
|
||||||
.collect()
|
.collect()
|
||||||
.toArray();
|
.toArray();
|
||||||
|
|
||||||
|
@ -216,7 +213,7 @@ public class PrepareProgramme {
|
||||||
String[] tmp = ent.split("\\.");
|
String[] tmp = ent.split("\\.");
|
||||||
if (tmp.length <= 2) {
|
if (tmp.length <= 2) {
|
||||||
if (StringUtils.isEmpty(entry._2()._2())) {
|
if (StringUtils.isEmpty(entry._2()._2())) {
|
||||||
map.put(entry._1(), new Tuple2<String, String>(entry._2()._1(), entry._2()._1()));
|
map.put(entry._1(), new Tuple2<>(entry._2()._1(), entry._2()._1()));
|
||||||
} else {
|
} else {
|
||||||
map.put(entry._1(), entry._2());
|
map.put(entry._1(), entry._2());
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -29,7 +29,7 @@ import scala.Tuple2;
|
||||||
*/
|
*/
|
||||||
public class PrepareProjects {
|
public class PrepareProjects {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class);
|
private static final Logger log = LoggerFactory.getLogger(PrepareProjects.class);
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
|
@ -31,15 +31,16 @@ import eu.dnetlib.dhp.common.DbClient;
|
||||||
*/
|
*/
|
||||||
public class ReadProjectsFromDB implements Closeable {
|
public class ReadProjectsFromDB implements Closeable {
|
||||||
|
|
||||||
private final DbClient dbClient;
|
|
||||||
private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class);
|
private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class);
|
||||||
|
|
||||||
|
private static final String query = "SELECT code " +
|
||||||
|
"from projects where id like 'corda__h2020%' ";
|
||||||
|
|
||||||
|
private final DbClient dbClient;
|
||||||
private final Configuration conf;
|
private final Configuration conf;
|
||||||
private final BufferedWriter writer;
|
private final BufferedWriter writer;
|
||||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private final static String query = "SELECT code " +
|
|
||||||
"from projects where id like 'corda__h2020%' ";
|
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
|
@ -65,9 +66,9 @@ public class ReadProjectsFromDB implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void execute(final String sql, final Function<ResultSet, List<ProjectSubset>> producer) throws Exception {
|
public void execute(final String sql, final Function<ResultSet, List<ProjectSubset>> producer) {
|
||||||
|
|
||||||
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r));
|
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(this::writeProject);
|
||||||
|
|
||||||
dbClient.processResults(sql, consumer);
|
dbClient.processResults(sql, consumer);
|
||||||
}
|
}
|
||||||
|
@ -94,20 +95,20 @@ public class ReadProjectsFromDB implements Closeable {
|
||||||
|
|
||||||
public ReadProjectsFromDB(
|
public ReadProjectsFromDB(
|
||||||
final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword)
|
final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword)
|
||||||
throws Exception {
|
throws IOException {
|
||||||
|
|
||||||
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
||||||
this.conf = new Configuration();
|
this.conf = new Configuration();
|
||||||
this.conf.set("fs.defaultFS", hdfsNameNode);
|
this.conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
FileSystem fileSystem = FileSystem.get(this.conf);
|
FileSystem fileSystem = FileSystem.get(this.conf);
|
||||||
Path hdfsWritePath = new Path(hdfsPath);
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsWritePath)) {
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
fileSystem.delete(hdfsWritePath, false);
|
fileSystem.delete(hdfsWritePath, false);
|
||||||
}
|
}
|
||||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
|
||||||
|
|
||||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager.project;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
@ -22,15 +21,16 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.H2020Classification;
|
import eu.dnetlib.dhp.schema.oaf.H2020Classification;
|
||||||
import eu.dnetlib.dhp.schema.oaf.H2020Programme;
|
import eu.dnetlib.dhp.schema.oaf.H2020Programme;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -47,13 +47,10 @@ import scala.Tuple2;
|
||||||
*
|
*
|
||||||
* To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more
|
* To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more
|
||||||
* than one programme.
|
* than one programme.
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class SparkAtomicActionJob {
|
public class SparkAtomicActionJob {
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class);
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
private static final HashMap<String, String> programmeMap = new HashMap<>();
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
@ -137,7 +134,6 @@ public class SparkAtomicActionJob {
|
||||||
h2020classification.setClassification(csvProgramme.getClassification());
|
h2020classification.setClassification(csvProgramme.getClassification());
|
||||||
h2020classification.setH2020Programme(pm);
|
h2020classification.setH2020Programme(pm);
|
||||||
setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short());
|
setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short());
|
||||||
// setProgramme(h2020classification, ocsvProgramme.get().getClassification());
|
|
||||||
pp.setH2020classification(Arrays.asList(h2020classification));
|
pp.setH2020classification(Arrays.asList(h2020classification));
|
||||||
|
|
||||||
return pp;
|
return pp;
|
||||||
|
@ -152,20 +148,16 @@ public class SparkAtomicActionJob {
|
||||||
.map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> {
|
.map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> {
|
||||||
Optional<EXCELTopic> op = Optional.ofNullable(p._2());
|
Optional<EXCELTopic> op = Optional.ofNullable(p._2());
|
||||||
Project rp = p._1();
|
Project rp = p._1();
|
||||||
if (op.isPresent()) {
|
op.ifPresent(excelTopic -> rp.setH2020topicdescription(excelTopic.getTitle()));
|
||||||
rp.setH2020topicdescription(op.get().getTitle());
|
|
||||||
}
|
|
||||||
return rp;
|
return rp;
|
||||||
}, Encoders.bean(Project.class))
|
}, Encoders.bean(Project.class))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.groupByKey(
|
.groupByKey(
|
||||||
(MapFunction<Project, String>) p -> p.getId(),
|
(MapFunction<Project, String>) OafEntity::getId,
|
||||||
Encoders.STRING())
|
Encoders.STRING())
|
||||||
.mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> {
|
.mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> {
|
||||||
Project first = it.next();
|
Project first = it.next();
|
||||||
it.forEachRemaining(p -> {
|
it.forEachRemaining(first::mergeFrom);
|
||||||
first.mergeFrom(p);
|
|
||||||
});
|
|
||||||
return first;
|
return first;
|
||||||
}, Encoders.bean(Project.class))
|
}, Encoders.bean(Project.class))
|
||||||
.toJavaRDD()
|
.toJavaRDD()
|
||||||
|
@ -189,12 +181,6 @@ public class SparkAtomicActionJob {
|
||||||
h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
|
h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// private static void setProgramme(H2020Classification h2020Classification, String classification) {
|
|
||||||
// String[] tmp = classification.split(" \\| ");
|
|
||||||
//
|
|
||||||
// h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
|
|
||||||
// }
|
|
||||||
|
|
||||||
public static <R> Dataset<R> readPath(
|
public static <R> Dataset<R> readPath(
|
||||||
SparkSession spark, String inputPath, Class<R> clazz) {
|
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||||
return spark
|
return spark
|
||||||
|
|
|
@ -1,40 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.commons.csv.CSVFormat;
|
|
||||||
import org.apache.commons.csv.CSVRecord;
|
|
||||||
import org.apache.commons.lang.reflect.FieldUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads a generic csv and maps it into classes that mirror its schema
|
|
||||||
*/
|
|
||||||
public class CSVParser {
|
|
||||||
|
|
||||||
public <R> List<R> parse(String csvFile, String classForName)
|
|
||||||
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException {
|
|
||||||
final CSVFormat format = CSVFormat.EXCEL
|
|
||||||
.withHeader()
|
|
||||||
.withDelimiter(';')
|
|
||||||
.withQuote('"')
|
|
||||||
.withTrim();
|
|
||||||
List<R> ret = new ArrayList<>();
|
|
||||||
final org.apache.commons.csv.CSVParser parser = org.apache.commons.csv.CSVParser.parse(csvFile, format);
|
|
||||||
final Set<String> headers = parser.getHeaderMap().keySet();
|
|
||||||
Class<?> clazz = Class.forName(classForName);
|
|
||||||
for (CSVRecord csvRecord : parser.getRecords()) {
|
|
||||||
final Object cc = clazz.newInstance();
|
|
||||||
for (String header : headers) {
|
|
||||||
FieldUtils.writeField(cc, header, csvRecord.get(header), true);
|
|
||||||
|
|
||||||
}
|
|
||||||
ret.add((R) cc);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,200 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* the mmodel for the projects csv file
|
|
||||||
*/
|
|
||||||
public class CSVProject implements Serializable {
|
|
||||||
private String rcn;
|
|
||||||
private String id;
|
|
||||||
private String acronym;
|
|
||||||
private String status;
|
|
||||||
private String programme;
|
|
||||||
private String topics;
|
|
||||||
private String frameworkProgramme;
|
|
||||||
private String title;
|
|
||||||
private String startDate;
|
|
||||||
private String endDate;
|
|
||||||
private String projectUrl;
|
|
||||||
private String objective;
|
|
||||||
private String totalCost;
|
|
||||||
private String ecMaxContribution;
|
|
||||||
private String call;
|
|
||||||
private String fundingScheme;
|
|
||||||
private String coordinator;
|
|
||||||
private String coordinatorCountry;
|
|
||||||
private String participants;
|
|
||||||
private String participantCountries;
|
|
||||||
private String subjects;
|
|
||||||
|
|
||||||
public String getRcn() {
|
|
||||||
return rcn;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRcn(String rcn) {
|
|
||||||
this.rcn = rcn;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getAcronym() {
|
|
||||||
return acronym;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setAcronym(String acronym) {
|
|
||||||
this.acronym = acronym;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStatus() {
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setStatus(String status) {
|
|
||||||
this.status = status;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getProgramme() {
|
|
||||||
return programme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setProgramme(String programme) {
|
|
||||||
this.programme = programme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTopics() {
|
|
||||||
return topics;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setTopics(String topics) {
|
|
||||||
this.topics = topics;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getFrameworkProgramme() {
|
|
||||||
return frameworkProgramme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFrameworkProgramme(String frameworkProgramme) {
|
|
||||||
this.frameworkProgramme = frameworkProgramme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTitle() {
|
|
||||||
return title;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setTitle(String title) {
|
|
||||||
this.title = title;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStartDate() {
|
|
||||||
return startDate;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setStartDate(String startDate) {
|
|
||||||
this.startDate = startDate;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getEndDate() {
|
|
||||||
return endDate;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setEndDate(String endDate) {
|
|
||||||
this.endDate = endDate;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getProjectUrl() {
|
|
||||||
return projectUrl;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setProjectUrl(String projectUrl) {
|
|
||||||
this.projectUrl = projectUrl;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getObjective() {
|
|
||||||
return objective;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setObjective(String objective) {
|
|
||||||
this.objective = objective;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTotalCost() {
|
|
||||||
return totalCost;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setTotalCost(String totalCost) {
|
|
||||||
this.totalCost = totalCost;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getEcMaxContribution() {
|
|
||||||
return ecMaxContribution;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setEcMaxContribution(String ecMaxContribution) {
|
|
||||||
this.ecMaxContribution = ecMaxContribution;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getCall() {
|
|
||||||
return call;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCall(String call) {
|
|
||||||
this.call = call;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getFundingScheme() {
|
|
||||||
return fundingScheme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFundingScheme(String fundingScheme) {
|
|
||||||
this.fundingScheme = fundingScheme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getCoordinator() {
|
|
||||||
return coordinator;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCoordinator(String coordinator) {
|
|
||||||
this.coordinator = coordinator;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getCoordinatorCountry() {
|
|
||||||
return coordinatorCountry;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCoordinatorCountry(String coordinatorCountry) {
|
|
||||||
this.coordinatorCountry = coordinatorCountry;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getParticipants() {
|
|
||||||
return participants;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setParticipants(String participants) {
|
|
||||||
this.participants = participants;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getParticipantCountries() {
|
|
||||||
return participantCountries;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setParticipantCountries(String participantCountries) {
|
|
||||||
this.participantCountries = participantCountries;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getSubjects() {
|
|
||||||
return subjects;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSubjects(String subjects) {
|
|
||||||
this.subjects = subjects;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -17,6 +17,8 @@ import org.apache.poi.ss.usermodel.Row;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads a generic excel file and maps it into classes that mirror its schema
|
* Reads a generic excel file and maps it into classes that mirror its schema
|
||||||
*/
|
*/
|
||||||
|
@ -26,13 +28,12 @@ public class EXCELParser {
|
||||||
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException,
|
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException,
|
||||||
InvalidFormatException {
|
InvalidFormatException {
|
||||||
|
|
||||||
OPCPackage pkg = OPCPackage.open(file);
|
try (OPCPackage pkg = OPCPackage.open(file); XSSFWorkbook wb = new XSSFWorkbook(pkg)) {
|
||||||
XSSFWorkbook wb = new XSSFWorkbook(pkg);
|
|
||||||
|
|
||||||
XSSFSheet sheet = wb.getSheet(sheetName);
|
XSSFSheet sheet = wb.getSheet(sheetName);
|
||||||
|
|
||||||
if (sheetName == null) {
|
if (sheet == null) {
|
||||||
throw new RuntimeException("Sheet name " + sheetName + " not present in current file");
|
throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file");
|
||||||
}
|
}
|
||||||
|
|
||||||
List<R> ret = new ArrayList<>();
|
List<R> ret = new ArrayList<>();
|
||||||
|
@ -73,5 +74,6 @@ public class EXCELParser {
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,34 +1,21 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
package eu.dnetlib.dhp.actionmanager.project.utils;
|
||||||
|
|
||||||
import java.io.BufferedWriter;
|
import java.io.*;
|
||||||
import java.io.Closeable;
|
import java.util.Optional;
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.OutputStreamWriter;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.collection.HttpConnector2;
|
import eu.dnetlib.dhp.common.collection.GetCSV;
|
||||||
|
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Applies the parsing of a csv file and writes the Serialization of it in hdfs
|
* Applies the parsing of a csv file and writes the Serialization of it in hdfs
|
||||||
*/
|
*/
|
||||||
public class ReadCSV implements Closeable {
|
public class ReadCSV {
|
||||||
private static final Log log = LogFactory.getLog(ReadCSV.class);
|
|
||||||
private final Configuration conf;
|
|
||||||
private final BufferedWriter writer;
|
|
||||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
private final String csvFile;
|
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
@ -44,56 +31,22 @@ public class ReadCSV implements Closeable {
|
||||||
final String hdfsPath = parser.get("hdfsPath");
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
final String hdfsNameNode = parser.get("hdfsNameNode");
|
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||||
final String classForName = parser.get("classForName");
|
final String classForName = parser.get("classForName");
|
||||||
|
Optional<String> delimiter = Optional.ofNullable(parser.get("delimiter"));
|
||||||
|
char del = ';';
|
||||||
|
if (delimiter.isPresent())
|
||||||
|
del = delimiter.get().charAt(0);
|
||||||
|
|
||||||
try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL)) {
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
log.info("Getting CSV file...");
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
readCSV.execute(classForName);
|
BufferedReader reader = new BufferedReader(
|
||||||
|
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)));
|
||||||
|
|
||||||
}
|
GetCSV.getCsv(fileSystem, reader, hdfsPath, classForName, del);
|
||||||
}
|
|
||||||
|
|
||||||
public void execute(final String classForName) throws Exception {
|
reader.close();
|
||||||
CSVParser csvParser = new CSVParser();
|
|
||||||
csvParser
|
|
||||||
.parse(csvFile, classForName)
|
|
||||||
.stream()
|
|
||||||
.forEach(p -> write(p));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
writer.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReadCSV(
|
|
||||||
final String hdfsPath,
|
|
||||||
final String hdfsNameNode,
|
|
||||||
final String fileURL)
|
|
||||||
throws Exception {
|
|
||||||
this.conf = new Configuration();
|
|
||||||
this.conf.set("fs.defaultFS", hdfsNameNode);
|
|
||||||
HttpConnector2 httpConnector = new HttpConnector2();
|
|
||||||
FileSystem fileSystem = FileSystem.get(this.conf);
|
|
||||||
Path hdfsWritePath = new Path(hdfsPath);
|
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsWritePath)) {
|
|
||||||
fileSystem.delete(hdfsWritePath, false);
|
|
||||||
}
|
|
||||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
|
||||||
|
|
||||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
|
||||||
this.csvFile = httpConnector.getInputSource(fileURL);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void write(final Object p) {
|
|
||||||
try {
|
|
||||||
writer.write(OBJECT_MAPPER.writeValueAsString(p));
|
|
||||||
writer.newLine();
|
|
||||||
} catch (final Exception e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,18 +11,20 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.collection.HttpConnector2;
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Applies the parsing of an excel file and writes the Serialization of it in hdfs
|
* Applies the parsing of an excel file and writes the Serialization of it in hdfs
|
||||||
*/
|
*/
|
||||||
public class ReadExcel implements Closeable {
|
public class ReadExcel implements Closeable {
|
||||||
private static final Log log = LogFactory.getLog(ReadCSV.class);
|
private static final Log log = LogFactory.getLog(ReadExcel.class);
|
||||||
private final Configuration conf;
|
|
||||||
private final BufferedWriter writer;
|
private final BufferedWriter writer;
|
||||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
private final InputStream excelFile;
|
private final InputStream excelFile;
|
||||||
|
@ -31,7 +33,7 @@ public class ReadExcel implements Closeable {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
ReadCSV.class
|
ReadExcel.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/actionmanager/project/parameters.json")));
|
"/eu/dnetlib/dhp/actionmanager/project/parameters.json")));
|
||||||
|
|
||||||
|
@ -51,13 +53,15 @@ public class ReadExcel implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void execute(final String classForName, final String sheetName) throws Exception {
|
public void execute(final String classForName, final String sheetName)
|
||||||
|
throws IOException, ClassNotFoundException, InvalidFormatException, IllegalAccessException,
|
||||||
|
InstantiationException {
|
||||||
|
|
||||||
EXCELParser excelParser = new EXCELParser();
|
EXCELParser excelParser = new EXCELParser();
|
||||||
excelParser
|
excelParser
|
||||||
.parse(excelFile, classForName, sheetName)
|
.parse(excelFile, classForName, sheetName)
|
||||||
.stream()
|
.stream()
|
||||||
.forEach(p -> write(p));
|
.forEach(this::write);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -68,20 +72,20 @@ public class ReadExcel implements Closeable {
|
||||||
public ReadExcel(
|
public ReadExcel(
|
||||||
final String hdfsPath,
|
final String hdfsPath,
|
||||||
final String hdfsNameNode,
|
final String hdfsNameNode,
|
||||||
final String fileURL)
|
final String fileURL) throws CollectorException, IOException {
|
||||||
throws Exception {
|
|
||||||
this.conf = new Configuration();
|
final Configuration conf = new Configuration();
|
||||||
this.conf.set("fs.defaultFS", hdfsNameNode);
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
HttpConnector2 httpConnector = new HttpConnector2();
|
HttpConnector2 httpConnector = new HttpConnector2();
|
||||||
FileSystem fileSystem = FileSystem.get(this.conf);
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
Path hdfsWritePath = new Path(hdfsPath);
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsWritePath)) {
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
fileSystem.delete(hdfsWritePath, false);
|
fileSystem.delete(hdfsWritePath, false);
|
||||||
}
|
}
|
||||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
|
||||||
|
|
||||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
|
||||||
this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
|
this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,32 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
package eu.dnetlib.dhp.actionmanager.project.utils.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import com.opencsv.bean.CsvBindByName;
|
||||||
|
import com.opencsv.bean.CsvIgnore;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The model for the programme csv file
|
* The model for the programme csv file
|
||||||
*/
|
*/
|
||||||
public class CSVProgramme implements Serializable {
|
public class CSVProgramme implements Serializable {
|
||||||
|
|
||||||
private String rcn;
|
@CsvBindByName(column = "code")
|
||||||
private String code;
|
private String code;
|
||||||
|
|
||||||
|
@CsvBindByName(column = "title")
|
||||||
private String title;
|
private String title;
|
||||||
|
|
||||||
|
@CsvBindByName(column = "shortTitle")
|
||||||
private String shortTitle;
|
private String shortTitle;
|
||||||
|
|
||||||
|
@CsvBindByName(column = "language")
|
||||||
private String language;
|
private String language;
|
||||||
|
|
||||||
|
@CsvIgnore
|
||||||
private String classification;
|
private String classification;
|
||||||
|
|
||||||
|
@CsvIgnore
|
||||||
private String classification_short;
|
private String classification_short;
|
||||||
|
|
||||||
public String getClassification_short() {
|
public String getClassification_short() {
|
||||||
|
@ -33,14 +45,6 @@ public class CSVProgramme implements Serializable {
|
||||||
this.classification = classification;
|
this.classification = classification;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getRcn() {
|
|
||||||
return rcn;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRcn(String rcn) {
|
|
||||||
this.rcn = rcn;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getCode() {
|
public String getCode() {
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
@ -73,5 +77,4 @@ public class CSVProgramme implements Serializable {
|
||||||
this.language = language;
|
this.language = language;
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
}
|
}
|
|
@ -0,0 +1,46 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.project.utils.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import com.opencsv.bean.CsvBindByName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the mmodel for the projects csv file
|
||||||
|
*/
|
||||||
|
public class CSVProject implements Serializable {
|
||||||
|
|
||||||
|
@CsvBindByName(column = "id")
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
@CsvBindByName(column = "programme")
|
||||||
|
private String programme;
|
||||||
|
|
||||||
|
@CsvBindByName(column = "topics")
|
||||||
|
private String topics;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getProgramme() {
|
||||||
|
return programme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProgramme(String programme) {
|
||||||
|
this.programme = programme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTopics() {
|
||||||
|
return topics;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTopics(String topics) {
|
||||||
|
this.topics = topics;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
package eu.dnetlib.dhp.actionmanager.project.utils.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue