Irish oaipmh exporter #443

Merged
claudio.atzori merged 7 commits from irish-oaipmh-exporter into beta 2024-06-05 10:55:09 +02:00
2 changed files with 13 additions and 25 deletions
Showing only changes of commit c9a327bc50 - Show all commits

View File

@ -4,6 +4,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.Charset;
import java.time.LocalDateTime; import java.time.LocalDateTime;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Optional; import java.util.Optional;
@ -145,15 +146,14 @@ public class IrishOaiExporterJob {
protected static byte[] gzip(final String str) { protected static byte[] gzip(final String str) {
if (StringUtils.isBlank(str)) { return null; } if (StringUtils.isBlank(str)) { return null; }
try { try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
final ByteArrayOutputStream obj = new ByteArrayOutputStream(); try (final GZIPOutputStream gzip = new GZIPOutputStream(baos)) {
final GZIPOutputStream gzip = new GZIPOutputStream(obj); IOUtils.write(str.getBytes(Charset.defaultCharset()), gzip);
gzip.write(str.getBytes("UTF-8")); }
gzip.flush(); return baos.toByteArray();
gzip.close();
return obj.toByteArray();
} catch (final IOException e) { } catch (final IOException e) {
throw new RuntimeException("error in gzip", e); throw new RuntimeException("error in gzip", e);
} }
} }
} }

View File

@ -6,10 +6,9 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.nio.charset.Charset;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -57,7 +56,7 @@ class IrishOaiExporterJobTest {
final byte[] bytes = IrishOaiExporterJob.gzip(message); final byte[] bytes = IrishOaiExporterJob.gzip(message);
assertNotNull(bytes); assertNotNull(bytes);
assertTrue(bytes.length > 0); assertTrue(bytes.length > 0);
assertEquals(message, decompress(bytes)); assertEquals(message, gunzip(bytes));
} }
@Test @Test
@ -66,22 +65,11 @@ class IrishOaiExporterJobTest {
assertNull(IrishOaiExporterJob.gzip(null)); assertNull(IrishOaiExporterJob.gzip(null));
} }
private static String decompress(final byte[] compressed) { public static String gunzip(final byte[] compressed) {
final StringBuilder outStr = new StringBuilder();
if ((compressed == null) || (compressed.length == 0)) { return null; } if ((compressed == null) || (compressed.length == 0)) { return null; }
try { if (!isCompressed(compressed)) { return new String(compressed); }
if (isCompressed(compressed)) { try (final GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(compressed))) {
final GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(compressed)); return IOUtils.toString(gis, Charset.defaultCharset());
final BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(gis, "UTF-8"));
String line;
while ((line = bufferedReader.readLine()) != null) {
outStr.append(line);
}
} else {
outStr.append(compressed);
}
return outStr.toString();
} catch (final IOException e) { } catch (final IOException e) {
throw new RuntimeException("error in gunzip", e); throw new RuntimeException("error in gunzip", e);
} }