From 32e2a8b34053dc8162bf20bb91c977d544d05b94 Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Wed, 4 Dec 2024 13:36:21 +0100 Subject: [PATCH 1/2] implemented zenodo dump collector plugin --- .../CollectZenodoDumpCollectorPlugin.java | 96 ++++++++++++++++++ .../plugin/zenodo/ZenodoTarIterator.java | 59 +++++++++++ .../zenodo/ZenodoPluginCollectionTest.java | 35 +++++++ .../dhp/collection/zenodo/zenodo.tar.gz | Bin 0 -> 7412 bytes 4 files changed, 190 insertions(+) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoTarIterator.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/zenodo/zenodo.tar.gz diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java new file mode 100644 index 0000000000..3ea29a9b03 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/CollectZenodoDumpCollectorPlugin.java @@ -0,0 +1,96 @@ + +package eu.dnetlib.dhp.collection.plugin.zenodo; + +import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; + +import java.io.IOException; +import java.io.InputStream; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.collection.ApiDescriptor; +import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; + +public class CollectZenodoDumpCollectorPlugin implements CollectorPlugin { + + final private Logger log = LoggerFactory.getLogger(getClass()); + + private void downloadItem(final String name, final String itemURL, final String basePath, + final FileSystem fileSystem) { + try { + final Path hdfsWritePath = new Path(String.format("%s/%s", basePath, name)); + final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true); + final HttpGet request = new HttpGet(itemURL); + final int timeout = 60; // seconds + final RequestConfig config = RequestConfig + .custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000) + .build(); + log.info("Downloading url {} into {}", itemURL, hdfsWritePath.getName()); + try (CloseableHttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config).build(); + CloseableHttpResponse response = client.execute(request)) { + int responseCode = response.getStatusLine().getStatusCode(); + log.info("Response code is {}", responseCode); + if (responseCode >= 200 && responseCode < 400) { + IOUtils.copy(response.getEntity().getContent(), fsDataOutputStream); + } + } catch (Throwable eu) { + throw new RuntimeException(eu); + } + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public Stream collect(ApiDescriptor api, AggregatorReport report) throws CollectorException { + try { + final String zenodoURL = api.getBaseUrl(); + final String hdfsURI = api.getParams().get("hdfsURI"); + final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsURI)); + downloadItem("zenodoDump.tar.gz", zenodoURL, "/tmp", fileSystem); + CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf()); + + Path sourcePath = new Path("/tmp/zenodoDump.tar.gz"); + CompressionCodec codec = factory.getCodec(sourcePath); + InputStream gzipInputStream = null; + try { + gzipInputStream = codec.createInputStream(fileSystem.open(sourcePath)); + return iterateTar(gzipInputStream); + + } catch (IOException e) { + throw new CollectorException(e); + } finally { + log.info("Closing gzip stream"); + org.apache.hadoop.io.IOUtils.closeStream(gzipInputStream); + } + } catch (Exception e) { + throw new CollectorException(e); + } + } + + private Stream iterateTar(InputStream gzipInputStream) throws Exception { + + Iterable iterable = () -> new ZenodoTarIterator(gzipInputStream); + return StreamSupport.stream(iterable.spliterator(), false); + + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoTarIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoTarIterator.java new file mode 100644 index 0000000000..8e627683e6 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoTarIterator.java @@ -0,0 +1,59 @@ + +package eu.dnetlib.dhp.collection.plugin.zenodo; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Iterator; + +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.io.IOUtils; + +public class ZenodoTarIterator implements Iterator, Closeable { + + private final InputStream gzipInputStream; + private final StringBuilder currentItem = new StringBuilder(); + private TarArchiveInputStream tais; + private boolean hasNext; + + public ZenodoTarIterator(InputStream gzipInputStream) { + this.gzipInputStream = gzipInputStream; + tais = new TarArchiveInputStream(gzipInputStream); + hasNext = getNextItem(); + } + + private boolean getNextItem() { + try { + TarArchiveEntry entry; + while ((entry = tais.getNextTarEntry()) != null) { + if (entry.isFile()) { + currentItem.setLength(0); + currentItem.append(IOUtils.toString(new InputStreamReader(tais))); + return true; + } + } + return false; + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public boolean hasNext() { + return hasNext; + } + + @Override + public String next() { + final String data = currentItem.toString(); + hasNext = getNextItem(); + return data; + } + + @Override + public void close() throws IOException { + gzipInputStream.close(); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java new file mode 100644 index 0000000000..9b5cf1850f --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/zenodo/ZenodoPluginCollectionTest.java @@ -0,0 +1,35 @@ + +package eu.dnetlib.dhp.collection.plugin.zenodo; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.util.zip.GZIPInputStream; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.collection.ApiDescriptor; +import eu.dnetlib.dhp.common.collection.CollectorException; + +public class ZenodoPluginCollectionTest { + + @Test + public void testZenodoIterator() throws Exception { + + final GZIPInputStream gis = new GZIPInputStream( + getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/zenodo/zenodo.tar.gz")); + try (ZenodoTarIterator it = new ZenodoTarIterator(gis)) { + Assertions.assertTrue(it.hasNext()); + int i = 0; + while (it.hasNext()) { + Assertions.assertNotNull(it.next()); + i++; + } + Assertions.assertEquals(10, i); + + } + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/zenodo/zenodo.tar.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/zenodo/zenodo.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c06bf4e5a5d568aa6c534cc4636796d9938f97e GIT binary patch literal 7412 zcmV44`c0g6Vvm2)J$*9; zQILd$BG?2eTe;eI*ca%jx!CHh+Ka8;CuqOPKFOW~iu$oB*|FrFwjNqPe| zq*``Y6H&he#qs~5p&6x}wEXU+>u-+TGuam=sCmU1!?@QE6MHPb>q~ zJDZ-or$a^p_$O`h7>bL4`aAnIP$;y^3gnay6XugHBMlmjs6P;UrMZXq_xgOq0(F{K zbar>IM~B=WtMf_id!@N+d$iZ%K3BtOALg47J`p`q>qp#6{5XlIPe9F)4-C4<0~&D} zkW-I?LSPc;y!wYA$29Q;IgNn<#N z`YbvD83K1+C)^*f2nsc|rR-TOl+Pbk-@gWHrW3pggB)h<+vS{@EAix~^HRo?W=b&T zvCp#K{WkAw#3Vc*uYrV^G@!yRC9NTg`bbOxr+bVJ)>skT7>Fp5qyd@&=pk(p4OL@= zN^-(RjQXrgI%6O^>2fB?xdd;8P;x2~c={}&;a(|idGZUCgh|Ke5=P(t&q~+tm1bAc zo2i-oE2GiA2@dL{z}(e6nRGrfFP77@{W=FvX~Hz{7$#Szx6kOM2*hyyR%uoV7+*H| zA^1-^eh#Ee7#L)vp{P4W(^B3AyHW5w#B){Bw2Qi1G>oidX;O9Odf)?O{*TuzU@$zO zwIS`Z9h6WK!RVSVgOB^z!h)f@agE%#GIJWzd%(Y}YsaAdj(R)!w7i8&FmY<2AU&Tc zj{}UcTat!e1x6r0s%6tCI!NcWHot%{bTHNax8rw=aO*q8jd60AH?#I%Q!nH>CddxtDQi^GGre91>^jXgNH2{ zc`ssenq7w4)nohFNm^x>NiX6d_C+rIMdr@aUe-D?j;I&!WP0jol&VTlI-o<{Uu=%l~&G= z(AF5W_F}zIjp0Bo8!2am=`L3)_xjP@Oyb#}h5uMdgeRo-#qpUE2a#;#3t#jw>;7 z9dBCF046K7XD-CR@uIR|5(lz z|M?on7XR7eKljCdl-hU<{AbeegW*4`>fapx^CRULUqPuTkn|JSO13mUyk0CnNQ4>} z14L|gjt`4}stJdN{G@Tv+}TIfTZI43V!2u_7cHw?-776Vu^4fJ*@NIJ0et{Au;mF~ z^iylw8VNtaBIuGpgoHQ1)T^h!VxCcc0p8Ut+AL(3CO8ovabOYFnm!E1-Ou9I0%=nW zZk4Yyy><-Yt*7WX6+!=#PZw%`0}jq#k=g3z0Pa>n#3nR5pxj4_m(xbxu&4v>Nirla z&T0oQZ|z6OIN13z`X&p#`Z$%KCch75qESza2Y({jNEeaEyK1243SwO>;!Of$jYY+G zpx2j=vT1?*@%1KIe|T9%)3vI_s_wdvGrT?y2g&zAoM^0@Xz|4=2`te1#h}N*>+_Qq zF%0w;&n}oh0?Duoq5>N5G1I}yA{c?>#pH}p7|ib=T9UCCDvLtdOVW$Pa5`4kMm7*n zco1YGJ2?h2V^mT%AA&Gzz(hD0$28~mJK7I~&qWCFN$&k|6oaQO^KLEjET=@q>-#}5 zrd2fbqVY{bF-F-gmr!;PSzS`CZ0ee+8BYO=fyp~JqP5VcpShaIyj!X@-CDEhRNL*Q z+iq8!reiqmc1y3@7F?^@Pl5iWSQ0oQ#+tBBuIwA&6L#z4B_y7)%Q)ZPGtGyP_g$!& zh&e`Oi*Jw?h3pu%RdY-Ogj@$9*FfIQcCAvb8|7BJp;x{(9ET3%1CFe31&+%ss78(l zA{L7_b11;d1eCnh2>87d&U{+vBI$2uN`W!Go{&uOGyVx&>sBZEv^<+|ayP&KX%!n8 z`L+XGu=HCdf2|jBuNM8pN7wqlP{_mdO=1%4D+CrX&@MNCUeghnvjCGmm*>X5U78D< zJ-(5bBZh;aPSw7JTVKS8r66*xE7DjHqBBK`3Nr=9fvRQ*uAzN0XNoWc`+)^xm|G2j zAm#Iek>C--T#G(ac@|g&^&$cEDWOsVu`&^1D57`*A*eDwX5t1Cej%o0GvrzQpf|G% z0bu{tPA{T6L|shfb{5eFYe5G(S4qFg2ix36Y!Sg)Y$+phb2IN$RdyxTmvGK ztSc-ajjR7%=nmYR^hi|)O_EsP>jM8d_Ue~}k>XsZu6~nP&SzKuiG8AB7;%Xrc?dZD zj&QR5kZ^K}n;pYF;H9E%qnPp=7$G3r8xYk|M+%?a4bVdmkE#(EUGkBZbdmsM#bOQq zFAY=!C8P8p2Rcq1?{yqn0S72kcKhXveJ1_F*XZRQjy@N zn4&Co#wA93arU6sFvxo{i{Of}e~Ffx>7`2@niUngY3Cgz_PhN+ot_Vn>4cjuZ$u=aI^!oHW5FZ*-A^Xr_=S zbk4wad7sDBhiZkNTA*BxNyRcsB<}~xZs6SjN0U_Vim%fG^-52SK>2jQ_Tu~CzAqqb z@YC1H(242O49v62DHtPt5JU@~Mo@0XjYG7BWGFyBv;*cQwaU9WFOk3dk;dLYbutZI zQ6xZ~;$%3dmI;mj_%%lzg3#bURTuo0ei;wM=ki zMr6q`ds&*YDB4vrV(Xj?6{Xgk$=~ z$elbgd9O+8V{nAPE_yhWkQ*Z|`AwNcQ<=d$WdZGgw{~p~D3N%=_hArEQk)n4zj<;b z4xSk1;dH=wBES@0n0T0I%z$z5o6Rcik6-6!36z;}3RragjmH+hf6ODz|JAE{`SJ39 zO{;t}|F^uw|G&nu#s9bX|Go2nQEfa*{%_jw!{z_3sDE?#|A`o~NEAq2fN3Ug=?F>v znGV5e4wj=_OG+P02fO4g%;slkh}9NiV|1IVAP!5WG>d30Ix!uAw_RIp$aYgB7)Wa)fMaOc%&q#i`BZ$uObNnqw#xMU3CBGQPG z&*Ud@DLfJTSAU0?(=|*>FHE0#clFyPQkjZhDxUSl_hUK`0;R4EFIOj5&oSqoslAp; zckx*gLGTQoFUP2sCS4vYkd-7sfNQ70*`Lp|x%LbgTFS_aBMbw&^9eR!G~&SpuokpU zHrW4#lYbBSip<}d#9`H6lKeZzyIhLq1hiK6uer_sT}an5wDMEPo?VswTeobp?bwD< zxAc~6H(GAF)^0bdm4<6#qUckge|h#VxFPz}$^O-=_L|f@Q1-9gcI%aS;LZx5>~g%|G%l_|7pqcHvs@{m;ZlT%l{3t`Xm9sx#j;&5OP&5|8F%~b*EytYt_ah zEdRIH+;dm$MA4aCQhj|%WD@nL+FQAKV~>ID#sUwxv4Bz8ZY*%W zBg_A*SdWJP*`|B*`!DwP`!8SR*y4X%{O`{AU;dk*$AJJ*Wj$;P;AZMSJVo>aoc>)O zjP>r+Z{3BEW!GxyEvssk!EvqFhTVdQqz%zYrEWQJtySLy0aIqK3X|?h$mYP%51fHd z1O5Z2eL)VFe<@>DARnVjE7}B3rJI#txOVS)E@ep;J&RWWwZAIb0gcS~TLBXjL-2}DFWc&`VNlEcK~lr)z^ z(u;+b>`tI6+{ESt71yp5t1hBfE4OEu2mwDSL|R};x(QX=&%YM@>hDr9KLlt0H0cU_ zHC_Pk8}9xk;Smq6eh-C@{jWz9+MB)0ASu+bC zKH`!N6(fwh$#kK&>&mpwB5=+dKoyuV@__~*uH`!}+$fhXSFCgsQgvYP7WH!FmMM1q z%TWz}^o@I>X71~4ycBaNtK>H!!OOkWLX`cbJy>$s3p zzD@@?ofpJ9@^JB^_&#Ahsdb{&5}oVmmjcNrEL*&#UV0oyLPEF(ET!pkHJ@XkNf&ZJ z3gnP7pJQ&w%WSD-N<4npq5(lB?E)(y_JW&XBt~GtB^gmaQLDw`6yD`tx>{7lx<0Od zBybhnL!VB*F4X-9{Jfmn?v|O}J(3q93Q7QI}Zl`Qex7zD${xdvpr~=`>7&+C~ z-GelTjd!7CHE!B4EjRV{nl<>Xx7((RJE+zT@C2L2np+@zG5aQOKyni*D{CfHZkCwM zgYs(-vnD2=LkuGF^0qNDy}8X z2N2Vp$U!i%6nyMxIz&&K*Fkrp!Ks$bQfJ_6=s9V)pWT|Ty4EH{xdoc#mYp?M4cuES zPE{|NhU*mFN?8YkX}BhMWKV<`(8T(>yK8S?z*gPOYyG>?kbW4;G|^86Uq-LGj#d2p zBnGWrDO-;v8r`atq-{7(Redi^*Q<}DWUuqYdc+-)?(v56Uaugpqf!Nu2a!Nrm=|P| z7dWk7?vj6w08YEP9AKAJZ3rs#+tvb9A@)8w+|9t}?xofhsgt=1H2(Hpwmrck%m1%D z;`jf`ZqEMS_V)eX*EqKC|F-Y{*2w>_Jl6OBDi8eqzoqqW^Z&Q`|J(fkZT|mV^Z!j# zo29?SET9s%5(JN1a(Od#al2UHn_4XJE$tSvT`cg8Efy$0#2y%%TrAM2x7*e9TY*)l zS$ErwcEjB~On&=ffy(wtmluh{lP$sHMsNfe4B>Z`NNf z`JiAO=~8L`*{22D8aalL%U@?*!E5NAN`wKgRwIYt&w`7DTY%u!#a_0NHYGjP=Cq-~ zI0R}3;KddQ?$tJ=_&fH~#RUSoHP&z&=hS@wz8nh?QXr1S5Ikwbu*&ZLw|6zmZQDTj zRdBRtL<-DZQJI59BektIP(a0%=m7T=PsqyFn{o$O}e#RSh99O-0`Ou@|!^Xz#YH!z(a=D0zX z|L=KBR{W1Trr|Ef{}lemqcj!&Q}Mrt{1_^DTytHXC6|TFNAvl1^hGk&J&Rp`)l*47CSZ6=o1829@kz5_6r7b z;I;W^K>2O`Km>)A$I*cq66e|hmXW}+CI^>avM6bW`n~$TA&+{2T+`@o3o(Jo0v09+ zYnbM}o(LbEftg)^86lvQhr%F2(`S-S&$5q`B#(+5jM_<&g)c~_c$*>?ZwzMaMOo)1 zCa)*)2Q0z?hS^~h6^=(X5DOMOMPn1t4-pr3E&)NJ~3^4HdQ+ z-%t?h9x)xpHEKL_1X#zVZRTl=KJhpR_1yKR$b1v>)AKd~j{5AJ_^;$yo$L}qMi+iL zc&g)cGK(?$1zr3WI^_5%SnFdUR6&<*`Q=B+cshzd!24bCJ_^StpfIZ6p&z`}Eh%Dc z#8Xfg#VE~-{9D6$TS>jGpo#2WJ=uN)za!B31+gh5|NcE*n-jfkTHwj2W!2syHx+B; zXE*3Lbh#PF3Y=mw;$?n{vuQjlcV=w!CV*%T?=^?raFeg4$G3n(#qxauuJN;ISM`59 ziS>Uhwv7L&@_!zsDgR&j|Etvh@nqHi@itulr-{C*|D)>vsQN#u{?BojSgQWd6Fy|E z>i?+vKdSzZs{bRW{?Ce;SSlEJrDXMzJUn)Bcups?>|>sXrIN&ha4FgORE*|^VR;y? zgxs9h{t@_?rwEy;N&wxx1dy#t0R8w)#Q&)4a!XeH&oC@!IseBoRQ&HTnu`Ca_}|^} zKPhwC@AM>{=?I^JwvOdVi2w?TwZRDB+UVa3{}}`U1uq2tILGq_oLTU{&w`+91s%)Z zcnGo!w#q2PJ-y36QgRd)BLp@ar+M=cvE`oiQLK{mVic=B=oG|50f>%xdsoCuh(2Fq zU?kbJMVRG!0!5E>!YqqtDTWk}zCz?%0068ZH^{=1aC9cZhWMQph1m$L*cP!6<$#&C z<7hUg3EVW^%SoCe)Lea{KB8-sKm8g~~uyAbE${8f_rII)nF$WLTQm;DO*X zoTeuUD3n{nu}fVWa>$VOU04j9=p<=0hpBAAQUR|fCpV!hyB`q94pA%Pd`kZN{F`9d zHEt!&TkxJRiT!%X^=BA*x<1>uT|PTMgPxavor_tUj#AJ-h&}7TBV{(VEsS`Ub9z6` zFaIRNlpH5O6Bp8&=XWwXhbP)Q2j}Hg0k=jlpw|Q;yV$E?UL(b3rhf-(KFH!kJnH=v zZWvedFeKG&HHz7#p7q?RXg`viU7Z()Be)HLsq?akU=_e%cCzsDpLfmcbGc(ZE9RtS z^P+LvV26z3{0uz#*+rVYCvaCW$!EpzqbO)ROitlRjtF}HDan&l+$fo5X@n3#lT%UI z7;~CP5N6}mHv$a#NtFg}!e}P@Zm~_*hvP_HZ`Dxmy%9pcMp>O9a@1mEbvIyv0}anf zf`RIAisvS+E}n@EI>Gv}^HyNrV}rfkI+*oC96o$7>}gLdaDqIOQ^yQ;cio_C8v%Ep z48AI|?!b|#~|o&3^0&<)C5-Q|ww8Is0g!qjcQO*loC$l^yCmWk3a z<~?ARuFeddu9~O6(&Op%eP3ssySl>=-&EpI_Irl|T~3~Gc$P+$n1PioYcyg#<+yu5B?2eIq}lQPZoiLi7}GSF-L@>=W^Kw0YRcjd zPMSgQpk$3nspD)Q&F3L2n!4}_Xai}CvFtX8Tx#2nE?ym(%HY61tSn5hV5qZ!av1kf zOx;1RJ2;wHq}nQ(g2(>(~xOtR_0x~l(-?w5vZ)*qnG|%O@lQfb Date: Wed, 4 Dec 2024 15:03:59 +0100 Subject: [PATCH 2/2] added zenodoDump to enum of CollectorPlugin --- .../main/java/eu/dnetlib/dhp/collection/CollectorWorker.java | 3 +++ .../java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index f63bfcb48d..4c6d0653eb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; +import eu.dnetlib.dhp.collection.plugin.zenodo.CollectZenodoDumpCollectorPlugin; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; @@ -129,6 +130,8 @@ public class CollectorWorker extends ReportingJob { return new Gtr2PublicationsCollectorPlugin(this.clientParams); case osfPreprints: return new OsfPreprintsCollectorPlugin(this.clientParams); + case zenodoDump: + return new CollectZenodoDumpCollectorPlugin(); case other: final CollectorPlugin.NAME.OTHER_NAME plugin = Optional .ofNullable(this.api.getParams().get("other_plugin_type")) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java index 72e691579b..93e65b6a70 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java @@ -11,7 +11,7 @@ public interface CollectorPlugin { enum NAME { - oai, other, rest_json2xml, file, fileGzip, baseDump, gtr2Publications, osfPreprints; + oai, other, rest_json2xml, file, fileGzip, baseDump, gtr2Publications, osfPreprints, zenodoDump; public enum OTHER_NAME { mdstore_mongodb_dump, mdstore_mongodb