forked from D-Net/dnet-hadoop
refactoring of gzip method
This commit is contained in:
parent
e234848af8
commit
c9a327bc50
|
@ -4,6 +4,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
@ -145,15 +146,14 @@ public class IrishOaiExporterJob {
|
||||||
protected static byte[] gzip(final String str) {
|
protected static byte[] gzip(final String str) {
|
||||||
if (StringUtils.isBlank(str)) { return null; }
|
if (StringUtils.isBlank(str)) { return null; }
|
||||||
|
|
||||||
try {
|
try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||||
final ByteArrayOutputStream obj = new ByteArrayOutputStream();
|
try (final GZIPOutputStream gzip = new GZIPOutputStream(baos)) {
|
||||||
final GZIPOutputStream gzip = new GZIPOutputStream(obj);
|
IOUtils.write(str.getBytes(Charset.defaultCharset()), gzip);
|
||||||
gzip.write(str.getBytes("UTF-8"));
|
}
|
||||||
gzip.flush();
|
return baos.toByteArray();
|
||||||
gzip.close();
|
|
||||||
return obj.toByteArray();
|
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
throw new RuntimeException("error in gzip", e);
|
throw new RuntimeException("error in gzip", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,10 +6,9 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.nio.charset.Charset;
|
||||||
import java.util.zip.GZIPInputStream;
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
@ -57,7 +56,7 @@ class IrishOaiExporterJobTest {
|
||||||
final byte[] bytes = IrishOaiExporterJob.gzip(message);
|
final byte[] bytes = IrishOaiExporterJob.gzip(message);
|
||||||
assertNotNull(bytes);
|
assertNotNull(bytes);
|
||||||
assertTrue(bytes.length > 0);
|
assertTrue(bytes.length > 0);
|
||||||
assertEquals(message, decompress(bytes));
|
assertEquals(message, gunzip(bytes));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -66,22 +65,11 @@ class IrishOaiExporterJobTest {
|
||||||
assertNull(IrishOaiExporterJob.gzip(null));
|
assertNull(IrishOaiExporterJob.gzip(null));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String decompress(final byte[] compressed) {
|
public static String gunzip(final byte[] compressed) {
|
||||||
final StringBuilder outStr = new StringBuilder();
|
|
||||||
if ((compressed == null) || (compressed.length == 0)) { return null; }
|
if ((compressed == null) || (compressed.length == 0)) { return null; }
|
||||||
try {
|
if (!isCompressed(compressed)) { return new String(compressed); }
|
||||||
if (isCompressed(compressed)) {
|
try (final GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(compressed))) {
|
||||||
final GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(compressed));
|
return IOUtils.toString(gis, Charset.defaultCharset());
|
||||||
final BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(gis, "UTF-8"));
|
|
||||||
|
|
||||||
String line;
|
|
||||||
while ((line = bufferedReader.readLine()) != null) {
|
|
||||||
outStr.append(line);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
outStr.append(compressed);
|
|
||||||
}
|
|
||||||
return outStr.toString();
|
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
throw new RuntimeException("error in gunzip", e);
|
throw new RuntimeException("error in gunzip", e);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue