removed the part after part-x- in the file name generated by spark. It was too long and created problems while creating the tar entries

This commit is contained in:
Miriam Baglioni 2021-07-13 17:11:49 +02:00
parent 618d2de2da
commit d418c309f5
1 changed files with 7 additions and 0 deletions

View File

@ -90,6 +90,13 @@ public class MakeTarArchive implements Serializable {
String p_string = p.toString();
if (!p_string.endsWith("_SUCCESS")) {
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
if (name.startsWith("part-") & name.length() > 10) {
String tmp = name.substring(0, 10);
if (name.contains(".")) {
tmp += name.substring(name.indexOf("."));
}
name = tmp;
}
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
entry.setSize(fileStatus.getLen());
current_size += fileStatus.getLen();