changed make tar to avoid repetition of name in archive

This commit is contained in:
Miriam Baglioni 2023-09-18 12:07:54 +02:00
parent 4885d36b3b
commit 9aec98cea0
1 changed files with 11 additions and 0 deletions

View File

@ -10,6 +10,7 @@ import java.util.Optional;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.slf4j.Logger;
@ -20,6 +21,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class MakeTarArchive implements Serializable {
private static final Logger log = LoggerFactory.getLogger(MakeTarArchive.class);
private static int index = 1;
private static String prevname = new String();
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
@ -156,10 +159,18 @@ public class MakeTarArchive implements Serializable {
String name = pString.substring(pString.lastIndexOf("/") + 1);
if (name.startsWith("part-") & name.length() > 10) {
String tmp = name.substring(0, 10);
if (prevname.equalsIgnoreCase(tmp)) {
tmp = tmp + "_" + index;
index += 1;
} else {
prevname = tmp;
index = 1;
}
if (name.contains(".")) {
tmp += name.substring(name.indexOf("."));
}
name = tmp;
}
if (rename) {
if (name.endsWith(".txt.gz"))